diff --git a/package/CHANGELOG b/package/CHANGELOG index 5fd704a9e87..6923b878d61 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -57,6 +57,9 @@ Fixes * Added parmed to setup.py Enhancements + * New analysis.hole2 module for HOLE interfacing. Contains a function (hole) + for running HOLE on singular PDB files and class (HoleAnalysis) for + trajectories (PR #2523) * Changed selection wildcards to support multiple wildcards (#2436) * Added coordinate reader and writer for NAMD binary coordinate format (PR #2485) * Improved ClusterCollection and Cluster string representations (Issue #2464) @@ -134,6 +137,7 @@ Changes function calls. (Issue #782) Deprecations + * analysis.hole is deprecated in 1.0 (remove in 2.0) * analysis.hbonds.HydrogenBondAnalysis is deprecated in 1.0 (remove in 2.0) diff --git a/package/MDAnalysis/analysis/hole.py b/package/MDAnalysis/analysis/hole.py index 4b050b2576a..b2a53195700 100644 --- a/package/MDAnalysis/analysis/hole.py +++ b/package/MDAnalysis/analysis/hole.py @@ -2,7 +2,7 @@ # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# Copyright (c) 2006-2020 The MDAnalysis Development Team and contributors # (see the file AUTHORS for the full list of names) # # Released under the GNU Public Licence, v2 or any higher version @@ -21,13 +21,18 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -r"""Generation and Analysis of HOLE pore profiles --- :mod:`MDAnalysis.analysis.hole` -================================================================================= +r"""Generation and Analysis of HOLE pore profiles (Deprecated) --- :mod:`MDAnalysis.analysis.hole` +================================================================================================== :Author: Lukas Stelzl, Oliver Beckstein :Year: 2011-2012 :Copyright: GNU Public License v2 +.. warning: + + This module is deprecated and will be removed in version 2.0. + Please use :mod:`MDAnalysis.analysis.hole2` instead. + With the help of this module, the :program:`hole` program from the HOLE_ suite of tools [Smart1993]_ [Smart1996]_ can be run on frames in an MD trajectory or NMR ensemble in order to analyze an ion channel pore or transporter pathway @@ -399,7 +404,8 @@ def _process_plot_kwargs(self, kwargs): kw = {} frames = kwargs.pop('frames', None) if frames is None: - frames = np.sort(list(self.profiles.keys())[::kwargs.pop('step', 1)]) + frames = np.sort(list(self.profiles.keys()) + [::kwargs.pop('step', 1)]) else: frames = asiterable(frames) kw['frames'] = frames @@ -807,15 +813,18 @@ def __init__(self, filename, **kwargs): self.dcd = kwargs.pop('dcd', None) if self.dcd: self.dcd = self.check_and_fix_long_filename(self.dcd) - self.dcd_step = kwargs.pop("step", 1) - 1 # HOLE docs description is confusing: step or skip?? + # HOLE docs description is confusing: step or skip?? + self.dcd_step = kwargs.pop("step", 1) - 1 self.dcd_iniskip = 0 self.cpoint = kwargs.pop("cpoint", None) self.cvect = kwargs.pop("cvect", None) self.sample = float(kwargs.pop("sample", 0.20)) self.dotden = int(kwargs.pop("dotden", 15)) self.endrad = float(kwargs.pop("endrad", 22.)) - self.shorto = int(kwargs.pop("shorto", 0)) # look at using SHORTO 2 for minimum output - self.ignore_residues = kwargs.pop("ignore_residues", self.default_ignore_residues) + # look at using SHORTO 2 for minimum output + self.shorto = int(kwargs.pop("shorto", 0)) + self.ignore_residues = kwargs.pop( + "ignore_residues", self.default_ignore_residues) self.radius = self.check_and_fix_long_filename( realpath(kwargs.pop('radius', None) or write_simplerad2())) self.raseed = kwargs.pop('raseed', None) @@ -828,7 +837,8 @@ def __init__(self, filename, **kwargs): hole_exe_name = kwargs.pop('executable', 'hole') self.exe['hole'] = which(hole_exe_name) if self.exe['hole'] is None: - errmsg = "HOLE binary {hole_exe_name!r} not found.".format(**vars()) + errmsg = "HOLE binary {hole_exe_name!r} not found.".format( + **vars()) logger.fatal(errmsg) logger.fatal("%(hole_exe_name)r must be on the PATH or provided as keyword argument 'executable'.", vars()) @@ -931,7 +941,8 @@ def check_and_fix_long_filename(self, filename, tmpdir=os.path.curdir): # try a relative path newname = os.path.relpath(filename) if len(newname) <= self.HOLE_MAX_LENGTH: - logger.debug("path check: Using relative path: %r --> %r", filename, newname) + logger.debug( + "path check: Using relative path: %r --> %r", filename, newname) return newname # shorten path by creating a symlink inside a safe temp dir @@ -941,8 +952,10 @@ def check_and_fix_long_filename(self, filename, tmpdir=os.path.curdir): self.tempfiles.append(newname) self.tempdirs.append(dirname) if len(newname) > self.HOLE_MAX_LENGTH: - logger.fatal("path check: Failed to shorten filename %r --> %r", filename, newname) - raise RuntimeError("Failed to shorten filename %r --> %r", filename, newname) + logger.fatal( + "path check: Failed to shorten filename %r --> %r", filename, newname) + raise RuntimeError( + "Failed to shorten filename %r --> %r", filename, newname) os.symlink(filename, newname) logger.debug("path check: Using symlink: %r --> %r", filename, newname) return newname @@ -955,9 +968,12 @@ def run(self, **kwargs): # NOTE: If cvect and cpoint had been None in the constructor then they are # ignored here. Arguably a bug... but then again, the keywords for run() are # not even officially documented :-). - kwargs.setdefault("cvect_xyz", seq2str(kwargs.pop('cvect', self.cvect))) - kwargs.setdefault("cpoint_xyz", seq2str(kwargs.pop('cpoint', self.cpoint))) - kwargs.setdefault("ignore", seq2str(kwargs.pop('ignore_residues', self.ignore_residues))) + kwargs.setdefault("cvect_xyz", seq2str( + kwargs.pop('cvect', self.cvect))) + kwargs.setdefault("cpoint_xyz", seq2str( + kwargs.pop('cpoint', self.cpoint))) + kwargs.setdefault("ignore", seq2str( + kwargs.pop('ignore_residues', self.ignore_residues))) holeargs = vars(self).copy() holeargs.update(kwargs) @@ -967,10 +983,13 @@ def run(self, **kwargs): f.write(inp) logger.debug("Wrote HOLE input file %r for inspection", inpname) - logger.info("Starting HOLE on %(filename)r (trajectory: %(dcd)r)", holeargs) - logger.debug("%s <%s >%s", self.exe['hole'], (inpname if inpname else "(input)"), outname) + logger.info( + "Starting HOLE on %(filename)r (trajectory: %(dcd)r)", holeargs) + logger.debug( + "%s <%s >%s", self.exe['hole'], (inpname if inpname else "(input)"), outname) with open(outname, "w") as output: - hole = subprocess.Popen([self.exe['hole']], stdin=subprocess.PIPE, stdout=output) + hole = subprocess.Popen( + [self.exe['hole']], stdin=subprocess.PIPE, stdout=output) stdout, stderr = hole.communicate(inp.encode('utf-8')) with open(outname, "r") as output: # HOLE is not very good at setting returncodes so check ourselves @@ -979,10 +998,12 @@ def run(self, **kwargs): hole.returncode = 255 break if hole.returncode != 0: - logger.fatal("HOLE Failure (%d). Check output %r", hole.returncode, outname) + logger.fatal("HOLE Failure (%d). Check output %r", + hole.returncode, outname) if stderr is not None: logger.fatal(stderr) - raise ApplicationError(hole.returncode, "HOLE {0!r} failed. Check output {1!r}.".format(self.exe['hole'], outname)) + raise ApplicationError(hole.returncode, "HOLE {0!r} failed. Check output {1!r}.".format( + self.exe['hole'], outname)) logger.info("HOLE finished: output file %(outname)r", vars()) def create_vmd_surface(self, filename="hole.vmd", **kwargs): @@ -1015,7 +1036,8 @@ def create_vmd_surface(self, filename="hole.vmd", **kwargs): os.close(fd) try: output = subprocess.check_output([self.exe["sph_process"], "-sos", "-dotden", - str(kwargs['dotden']), "-color", self.sphpdb, + str(kwargs['dotden'] + ), "-color", self.sphpdb, tmp_sos], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as err: os.unlink(tmp_sos) @@ -1029,13 +1051,14 @@ def create_vmd_surface(self, filename="hole.vmd", **kwargs): # Could check: os.devnull if subprocess.DEVNULL not available (>3.3) # Suppress stderr messages of sos_triangle with open(tmp_sos) as sos, open(filename, "w") as triangles, \ - open(os.devnull, 'w') as FNULL: + open(os.devnull, 'w') as FNULL: subprocess.check_call( [self.exe["sos_triangle"], "-s"], stdin=sos, stdout=triangles, stderr=FNULL) except subprocess.CalledProcessError as err: logger.fatal("sos_triangle failed ({0})".format(err.returncode)) - six.raise_from(OSError(err.returncode, "sos_triangle failed"), None) + six.raise_from( + OSError(err.returncode, "sos_triangle failed"), None) finally: os.unlink(tmp_sos) @@ -1067,10 +1090,10 @@ def collect(self, **kwargs): """ # cenxyz.cvec radius cen_line_D sum{s/(area point sourc - #0123456789.0123456789.0123456789.0123456789.0123456789.0123456789. + # 0123456789.0123456789.0123456789.0123456789.0123456789.0123456789. # 11 22 33 44 - #123456789.123456789.123456789.123456789.123456789.123456789.123456789. - #1 13 + # 123456789.123456789.123456789.123456789.123456789.123456789.123456789. + # 1 13 # 3F12 # -27.17082 15.77469 -73.19195 0.00013 (sampled) # -27.07082 12.91103 -69.39840 0.00032 (mid-point) @@ -1089,18 +1112,24 @@ def collect(self, **kwargs): filenames = glob.glob(self.filename) length = len(filenames) if length == 0: - logger.error("Glob pattern %r did not find any files.", self.filename) - raise ValueError("Glob pattern {0!r} did not find any files.".format(self.filename)) - logger.info("Found %d input files based on glob pattern %s", length, self.filename) + logger.error( + "Glob pattern %r did not find any files.", self.filename) + raise ValueError( + "Glob pattern {0!r} did not find any files.".format(self.filename)) + logger.info( + "Found %d input files based on glob pattern %s", length, self.filename) if self.dcd: u = Universe(self.filename, self.dcd) - length = int((u.trajectory.n_frames - self.dcd_iniskip) / (self.dcd_step + 1)) - logger.info("Found %d input frames in DCD trajectory %r", length, self.dcd) + length = int((u.trajectory.n_frames - + self.dcd_iniskip) / (self.dcd_step + 1)) + logger.info( + "Found %d input frames in DCD trajectory %r", length, self.dcd) # one recarray for each frame, indexed by frame number self.profiles = OrderedDict() - logger.info("Run %s: Reading %d HOLE profiles from %r", run, length, hole_output) + logger.info("Run %s: Reading %d HOLE profiles from %r", + run, length, hole_output) hole_profile_no = 0 records = [] with open(hole_output, "r") as hole: @@ -1122,8 +1151,10 @@ def collect(self, **kwargs): try: rxncoord, radius, cenlineD = holeformat.read(line) except: - logger.critical("Run %d: Problem parsing line %r", run, line.strip()) - logger.exception("Check input file %r.", hole_output) + logger.critical( + "Run %d: Problem parsing line %r", run, line.strip()) + logger.exception( + "Check input file %r.", hole_output) raise records.append((hole_profile_no, rxncoord, radius)) continue @@ -1131,7 +1162,7 @@ def collect(self, **kwargs): # end of records (empty line) read_data = False frame_hole_output = np.rec.fromrecords(records, formats="i4,f8,f8", - names="frame,rxncoord,radius") + names="frame,rxncoord,radius") # store the profile self.profiles[hole_profile_no] = frame_hole_output logger.debug("Collected HOLE profile for frame %d (%d datapoints)", @@ -1142,13 +1173,16 @@ def collect(self, **kwargs): rundir = os.path.join(outdir, "run_" + str(run)) if not os.path.exists(rundir): os.makedirs(rundir) - frame_hole_txt = os.path.join(rundir, "radii_{0!s}_{1:04d}.dat.gz".format(run, hole_profile_no)) + frame_hole_txt = os.path.join( + rundir, "radii_{0!s}_{1:04d}.dat.gz".format(run, hole_profile_no)) np.savetxt(frame_hole_txt, frame_hole_output) - logger.debug("Finished with frame %d, saved as %r", hole_profile_no, frame_hole_txt) + logger.debug( + "Finished with frame %d, saved as %r", hole_profile_no, frame_hole_txt) continue # if we get here then we haven't found anything interesting if len(self.profiles) == length: - logger.info("Collected HOLE radius profiles for %d frames", len(self.profiles)) + logger.info( + "Collected HOLE radius profiles for %d frames", len(self.profiles)) else: logger.warning("Missing data: Found %d HOLE profiles from %d frames.", len(self.profiles), length) @@ -1230,13 +1264,15 @@ def __init__(self, universe, **kwargs): self.cpoint = kwargs.pop('cpoint', None) if self.cpoint is True: self.cpoint = self.guess_cpoint(select=self.selection) - logger.info("Guessed CPOINT = %r from selection %r", self.cpoint, self.selection) + logger.info("Guessed CPOINT = %r from selection %r", + self.cpoint, self.selection) kwargs['cpoint'] = self.cpoint self.hole_kwargs = kwargs # processing - self.orderparameters = self._process_orderparameters(self.orderparametersfile) + self.orderparameters = self._process_orderparameters( + self.orderparametersfile) def guess_cpoint(self, select="protein", **kwargs): """Guess a point inside the pore. @@ -1313,9 +1349,11 @@ def run(self, start=None, stop=None, step=None): # (although the file renaming might create problems...) protein = self.universe.select_atoms(self.selection) for q, ts in zip(self.orderparameters[start:stop:step], self.universe.trajectory[start:stop:step]): - logger.info("HOLE analysis frame %4d (orderparameter %g)", ts.frame, q) + logger.info( + "HOLE analysis frame %4d (orderparameter %g)", ts.frame, q) fd, pdbfile = tempfile.mkstemp(suffix=".pdb") - os.close(fd) # only need an empty file that can be overwritten, close right away (Issue 129) + # only need an empty file that can be overwritten, close right away (Issue 129) + os.close(fd) try: protein.write(pdbfile) hole_profiles = self.run_hole(pdbfile, **hole_kw) diff --git a/package/MDAnalysis/analysis/hole2/__init__.py b/package/MDAnalysis/analysis/hole2/__init__.py new file mode 100644 index 00000000000..8c371e766e6 --- /dev/null +++ b/package/MDAnalysis/analysis/hole2/__init__.py @@ -0,0 +1,41 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2020 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +from __future__ import absolute_import +from ...due import due, Doi +from .hole import hole, HoleAnalysis +from .utils import create_vmd_surface + +due.cite(Doi("10.1016/S0006-3495(93)81293-1"), + description="HOLE program", + path="MDAnalysis.analysis.hole", + cite_module=True) +due.cite(Doi("10.1016/S0263-7855(97)00009-X"), + description="HOLE program", + path="MDAnalysis.analysis.hole", + cite_module=True) +due.cite(Doi("10.1016/j.jmb.2013.10.024"), + description="HOLE trajectory analysis with orderparameters", + path="MDAnalysis.analysis.hole", + cite_module=True) +del Doi diff --git a/package/MDAnalysis/analysis/hole2/hole.py b/package/MDAnalysis/analysis/hole2/hole.py new file mode 100644 index 00000000000..9bffe34ed78 --- /dev/null +++ b/package/MDAnalysis/analysis/hole2/hole.py @@ -0,0 +1,1464 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2020 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +"""HOLE Analysis --- :mod:`MDAnalysis.analysis.hole2.hole` +===================================================================================== + +:Author: Lily Wang +:Year: 2020 +:Copyright: GNU Public License v3 + +.. versionadded:: 1.0.0 + +This module contains the tools to interface with HOLE_ [Smart1993]_ +[Smart1996]_ to analyse an ion channel pore or transporter pathway [Stelzl2014]_ . + +Using HOLE on a PDB file +------------------------ + +Use the :func:``hole`` function to run `HOLE`_ on a single PDB file. For example, +the code below runs the `HOLE`_ program installed at '~/hole2/exe/hole' :: + + from MDAnalysis.tests.datafiles import PDB_HOLE + from MDAnalysis.analysis import hole2 + profiles = hole2.hole(PDB_HOLE, executable='~/hole2/exe/hole') + # to create a VMD surface of the pore + hole2.create_vmd_surface(filename='hole.vmd') + +``profiles`` is a dictionary of HOLE profiles, indexed by the frame number. If only +a PDB file is passed to the function, there will only be one profile at frame 0. +You can visualise the pore by loading your PDB file into VMD, and in +Extensions > Tk Console, type:: + + source hole.vmd + +You can also pass a DCD trajectory with the same atoms in the same order as +your PDB file with the ``dcd`` keyword argument. In that case, ``profiles`` will +contain multiple HOLE profiles, indexed by frame. + +The HOLE program will create some output files: + + * an output file (default name: hole.out) + * an sphpdb file (default name: hole.sph) + * a file of van der Waals' radii + (if not specified with ``vdwradii_file``. Default name: simple2.rad) + * a symlink of your PDB or DCD files (if the original name is too long) + * the input text (if you specify ``infile``) + +By default (`keep_files=True`), these files are kept. If you would like to +delete the files after the function is wrong, set `keep_files=False`. Keep in +mind that if you delete the sphpdb file, you cannot then create a VMD surface. + + +Using HOLE on a trajectory +-------------------------- + +You can also run HOLE on a trajectory through the :class:`HoleAnalysis` class. +This behaves similarly to the ``hole`` function, although arguments such as ``cpoint`` +and ``cvect`` become runtime arguments for the :meth:`~HoleAnalysis.run` function. + +The class can be set-up and run like a normal MDAnalysis analysis class:: + + import MDAnalysis as mda + from MDAnalysis.tests.datafiles import MULTIPDB_HOLE + from MDAnalysis.analysis import hole2 + + ha = hole2.HoleAnalysis(u, executable='~/hole2/exe/hole') as h2: + ha.run() + ha.create_vmd_surface(filename='hole.vmd') + +The VMD surface created by the class updates the pore for each frame of the trajectory. +Use it as normal by loading your trajectory in VMD and sourcing the file in the Tk Console. + +Again, HOLE writes out files for each frame. If you would like to delete these files +after the analysis, you can call :meth:`~HoleAnalysis.delete_temporary_files`:: + + ha.delete_temporary_files() + +Alternatively, you can use HoleAnalysis as a context manager that deletes temporary +files when you are finished with the context manager:: + + import MDAnalysis as mda + from MDAnalysis.tests.datafiles import MULTIPDB_HOLE + from MDAnalysis.analysis import hole2 + + with hole2.HoleAnalysis(u, executable='~/hole2/exe/hole') as h2: + h2.run() + h2.create_vmd_surface() + + +.. _HOLE: http://www.holeprogram.org + + +Functions and classes +--------------------- + +.. autofunction:: hole + +.. autoclass:: HoleAnalysis + :members: + +""" + +from __future__ import absolute_import, division + +from six.moves import zip, cPickle +import six + +import os +import errno +import tempfile +import textwrap +import logging +import itertools +import warnings + +import numpy as np +import matplotlib +import matplotlib.pyplot as plt +from collections import OrderedDict + +from ...exceptions import ApplicationError +from ..base import AnalysisBase +from ...lib import util +from .utils import (check_and_fix_long_filename, write_simplerad2, + set_up_hole_input, run_hole, collect_hole, + create_vmd_surface) +from .templates import (hole_input, hole_lines, vmd_script_array, + vmd_script_function, + IGNORE_RESIDUES) + +logger = logging.getLogger(__name__) + + +def hole(pdbfile, + infile_text=None, + infile=None, + outfile='hole.out', + sphpdb_file='hole.sph', + vdwradii_file=None, + executable='hole', + tmpdir=os.path.curdir, + sample=0.2, + end_radius=22.0, + cpoint=None, + cvect=None, + random_seed=None, + ignore_residues=IGNORE_RESIDUES, + output_level=0, + dcd=None, + dcd_iniskip=0, + dcd_step=1, + keep_files=True): + """Run :program:`hole` on a single frame or a DCD trajectory. + + :program:`hole` is part of the HOLE_ suite of programs. It is used to + analyze channels and cavities in proteins, especially ion channels. + + Only a subset of all `HOLE control parameters `_ + is supported and can be set with keyword arguments. + + Parameters + ---------- + + pdbfile : str + The `filename` is used as input for HOLE in the "COORD" card of the + input file. It specifies the name of a PDB coordinate file to be + used. This must be in Brookhaven protein databank format or + something closely approximating this. Both ATOM and HETATM records + are read. + infile_text: str, optional + HOLE input text or template. If set to ``None``, the function will + create the input text from the other parameters. + infile: str, optional + File to write the HOLE input text for later inspection. If set to + ``None``, the input text is not written out. + outfile : str, optional + file name of the file collecting HOLE's output (which can be + parsed using :meth:`collect_hole(outfile)`. + sphpdb_file : str, optional + path to the HOLE sph file, a PDB-like file containing the + coordinates of the pore centers. + The coordinates are set to the sphere centres and the occupancies + are the sphere radii. All centres are assigned the atom name QSS and + residue name SPH and the residue number is set to the storage + number of the centre. In VMD, sph + objects are best displayed as "Points". Displaying .sph objects + rather than rendered or dot surfaces can be useful to analyze the + distance of particular atoms from the sphere-centre line. + .sph files can be used to produce molecular graphical + output from a hole run, by using the + :program:`sph_process` program to read the .sph file. + vdwradii_file: str, optional + path to the file specifying van der Waals radii for each atom. If + set to ``None``, then a set of default radii, + :data:`SIMPLE2_RAD`, is used (an extension of ``simple.rad`` from + the HOLE distribution). + executable: str, optional + Path to the :program:`hole` executable. + (e.g. ``~/hole2/exe/hole``). If + :program:`hole` is found on the :envvar:`PATH`, then the bare + executable name is sufficient. + tmpdir: str, optional + The temporary directory that files can be symlinked to, to shorten + the path name. HOLE can only read filenames up to a certain length. + sample : float, optional + distance of sample points in Å. + Specifies the distance between the planes used in the HOLE + procedure. The default value should be reasonable for most + purposes. However, if you wish to visualize a very tight + constriction then specify a smaller value. + This value determines how many points in the pore profile are + calculated. + end_radius : float, optional + Radius in Å, which is considered to be the end of the pore. This + keyword can be used to specify the radius above which the + program regards a result as indicating that the end of the pore + has been reached. This may need to be increased for large channels, + or reduced for small channels. + cpoint : array_like, 'center_of_geometry' or None, optional + coordinates of a point inside the pore, e.g. ``[12.3, 0.7, + 18.55]``. If set to ``None`` (the default) then HOLE's own search + algorithm is used. + ``cpoint`` specifies a point which lies within the channel. For + simple channels (e.g. gramicidin), results do not show great + sensitivity to the exact point taken. An easy way to produce an + initial point is to use molecular graphics to find two atoms which + lie either side of the pore and to average their coordinates. Or + if the channel structure contains water molecules or counter ions + then take the coordinates of one of these (and use the + ``ignore_residues`` keyword to ignore them in the pore radius + calculation). + If this card is not specified, then HOLE (from version 2.2) + attempts to guess where the channel will be. The procedure + assumes the channel is reasonably symmetric. The initial guess on + cpoint will be the centroid of all alpha carbon atoms (name 'CA' + in pdb file). This is then refined by a crude grid search up to 5 + Å from the original position. This procedure works most of the + time but is far from infallible — results should be + carefully checked (with molecular graphics) if it is used. + cvect : array_like, optional + Search direction, should be parallel to the pore axis, + e.g. ``[0,0,1]`` for the z-axis. + If this keyword is ``None`` (the default), then HOLE attempts to guess + where the channel will be. The procedure assumes that the channel is + reasonably symmetric. The guess will be either along the X axis + (1,0,0), Y axis (0,1,0) or Z axis (0,0,1). If the structure is not + aligned on one of these axis the results will clearly be + approximate. If a guess is used then results should be carefully + checked. + random_seed : int, optional + integer number to start the random number generator. + By default, + :program:`hole` will use the time of the day. + For reproducible runs (e.g., for testing) set ``random_seed`` + to an integer. + ignore_residues : array_like, optional + sequence of three-letter residues that are not taken into + account during the calculation; wildcards are *not* + supported. Note that all residues must have 3 letters. Pad + with space on the right-hand side if necessary. + output_level : int, optional + Determines the output of output in the ``outfile``. + For automated processing, this must be < 3. + 0: Full text output, + 1: All text output given except "run in progress" (i.e., + detailed contemporary description of what HOLE is doing). + 2: Ditto plus no graph type output - only leaving minimum + radius and conductance calculations. + 3: All text output other than input card mirroring and error messages + turned off. + dcd : str, optional + File name of CHARMM-style DCD trajectory (must be supplied together with a + matching PDB file `filename`) and then HOLE runs its analysis on + each frame. HOLE can *not* read DCD trajectories written by MDAnalysis, + which are NAMD-style. + ignored. Note that structural parameters determined for each + individual structure are written in a tagged format so that it is + possible to extract the information from the text output file using + a :program:`grep` command. The reading of the file can be + controlled by the ``dcd_step`` keyword and/or setting + ``dcd_iniskip`` to the number of frames to be skipped + initially. + dcd_step : int, optional + step size for going through the trajectory (skips ``dcd_step-1`` + frames). + keep_files : bool, optional + Whether to keep the HOLE output files and possible temporary + symlinks after running the function. Default: ``True`` + + + Returns + ------- + + dict + A dictionary of :class:`numpy.recarray`\ s, indexed by frame. + + + .. versionadded:: 1.0 + + """ + + if output_level > 3: + msg = 'output_level ({}) needs to be < 3 in order to extract a HOLE profile!' + warnings.warn(msg.format(output_level)) + + # get executable + exe = util.which(executable) + if exe is None: + raise OSError(errno.ENOENT, exe_err.format(name=executable, + kw='executable')) + + # get temp files + tmp_files = [outfile, sphpdb_file] + + short_filename = check_and_fix_long_filename(pdbfile, tmpdir=tmpdir) + if os.path.islink(short_filename): + tmp_files.append(short_filename) + + if dcd is not None: + dcd = check_and_fix_long_filename(dcd, tmpdir=tmpdir) + if os.path.islink(dcd): + tmp_files.append(dcd) + + if vdwradii_file is not None: + vdwradii_file = check_and_fix_long_filename(vdwradii_file, + tmpdir=tmpdir) + else: + vdwradii_file = write_simplerad2() + tmp_files.append(vdwradii_file) + + infile_text = set_up_hole_input(short_filename, + infile_text=infile_text, + infile=infile, + sphpdb_file=sphpdb_file, + vdwradii_file=vdwradii_file, + tmpdir=tmpdir, sample=sample, + end_radius=end_radius, + cpoint=cpoint, cvect=cvect, + random_seed=random_seed, + ignore_residues=ignore_residues, + output_level=output_level, + dcd=dcd, + dcd_iniskip=dcd_iniskip, + dcd_step=dcd_step-1) + + run_hole(outfile=outfile, infile_text=infile_text, executable=exe) + recarrays = collect_hole(outfile=outfile) + + if not keep_files: + for file in tmp_files: + try: + os.unlink(file) + except OSError: + pass + + return recarrays + + +class HoleAnalysis(AnalysisBase): + + """ + Run :program:`hole` on a trajectory. + + :program:`hole` is part of the HOLE_ suite of programs. It is used to + analyze channels and cavities in proteins, especially ion channels. + + Only a subset of all `HOLE control parameters `_ + is supported and can be set with keyword arguments. + + This class creates temporary PDB files for each frame and runs HOLE on + the frame. It can be used normally, or as a context manager. If used as a + context manager, the class will try to delete any temporary files created + by HOLE, e.g. sphpdb files and logfiles. :: + + with hole2.HoleAnalysis(u, executable='~/hole2/exe/hole') as h2: + h2.run() + h2.create_vmd_surface() + + Parameters + ---------- + + universe: Universe or AtomGroup + The Universe or AtomGroup to apply the analysis to. + select: string, optional + The selection string to create an atom selection that the HOLE + analysis is applied to. + vdwradii_file: str, optional + path to the file specifying van der Waals radii for each atom. If + set to ``None``, then a set of default radii, + :data:`SIMPLE2_RAD`, is used (an extension of ``simple.rad`` from + the HOLE distribution). + executable: str, optional + Path to the :program:`hole` executable. + (e.g. ``~/hole2/exe/hole``). If + :program:`hole` is found on the :envvar:`PATH`, then the bare + executable name is sufficient. + tmpdir: str, optional + The temporary directory that files can be symlinked to, to shorten + the path name. HOLE can only read filenames up to a certain length. + cpoint : array_like, 'center_of_geometry' or None, optional + coordinates of a point inside the pore, e.g. ``[12.3, 0.7, + 18.55]``. If set to ``None`` (the default) then HOLE's own search + algorithm is used. + ``cpoint`` specifies a point which lies within the channel. For + simple channels (e.g. gramicidin), results do not show great + sensitivity to the exact point taken. An easy way to produce an + initial point is to use molecular graphics to find two atoms which + lie either side of the pore and to average their coordinates. Or + if the channel structure contains water molecules or counter ions + then take the coordinates of one of these (and use the + ``ignore_residues`` keyword to ignore them in the pore radius + calculation). + If this card is not specified, then HOLE (from version 2.2) + attempts to guess where the channel will be. The procedure + assumes the channel is reasonably symmetric. The initial guess on + cpoint will be the centroid of all alpha carbon atoms (name 'CA' + in pdb file). This is then refined by a crude grid search up to 5 + Å from the original position. This procedure works most of the + time but is far from infallible — results should be + carefully checked (with molecular graphics) if it is used. + cvect : array_like, optional + Search direction, should be parallel to the pore axis, + e.g. ``[0,0,1]`` for the z-axis. + If this keyword is ``None`` (the default), then HOLE attempts to guess + where the channel will be. The procedure assumes that the channel is + reasonably symmetric. The guess will be either along the X axis + (1,0,0), Y axis (0,1,0) or Z axis (0,0,1). If the structure is not + aligned on one of these axis the results will clearly be + approximate. If a guess is used then results should be carefully + checked. + sample : float, optional + distance of sample points in Å. + Specifies the distance between the planes used in the HOLE + procedure. The default value should be reasonable for most + purposes. However, if you wish to visualize a very tight + constriction then specify a smaller value. + This value determines how many points in the pore profile are + calculated. + end_radius : float, optional + Radius in Å, which is considered to be the end of the pore. This + keyword can be used to specify the radius above which the + program regards a result as indicating that the end of the pore + has been reached. This may need to be increased for large channels, + or reduced for small channels. + output_level : int, optional + Determines the output of output in the ``outfile``. + For automated processing, this must be < 3. + 0: Full text output, + 1: All text output given except "run in progress" (i.e., + detailed contemporary description of what HOLE is doing). + 2: Ditto plus no graph type output - only leaving minimum + radius and conductance calculations. + 3: All text output other than input card mirroring and error messages + turned off. + ignore_residues : array_like, optional + sequence of three-letter residues that are not taken into + account during the calculation; wildcards are *not* + supported. Note that all residues must have 3 letters. Pad + with space on the right-hand side if necessary. + prefix: str, optional + Prefix for HOLE output files. + write_input_files: bool, optional + Whether to write out the input HOLE text as files. + Files are called `hole.inp`. + + + Returns + ------- + dict + A dictionary of :class:`numpy.recarray`\ s, indexed by frame. + + + .. versionadded:: 1.0 + + """ + + input_file = '{prefix}hole{i:03d}.inp' + output_file = '{prefix}hole{i:03d}.out' + sphpdb_file = '{prefix}hole{i:03d}.sph' + + input_file = '{prefix}hole{i:03d}.inp' + output_file = '{prefix}hole{i:03d}.out' + sphpdb_file = '{prefix}hole{i:03d}.sph' + + hole_header = textwrap.dedent(""" + ! Input file for Oliver Smart's HOLE program + ! written by MDAnalysis.analysis.hole2.HoleAnalysis + ! for a Universe + ! u = mda.Universe({} + ! ) + ! Frame {{i}} + """) + + hole_body = textwrap.dedent(""" + COORD {{coordinates}} + RADIUS {radius} + SPHPDB {{sphpdb}} + SAMPLE {sample:f} + ENDRAD {end_radius:f} + IGNORE {ignore} + SHORTO {output_level:d} + """) + + _guess_cpoint = False + + sphpdbs = None + outfiles = None + frames = None + profiles = None + + def __init__(self, universe, + select='protein', + verbose=False, + ignore_residues=IGNORE_RESIDUES, + vdwradii_file=None, + executable='hole', + sos_triangle='sos_triangle', + sph_process='sph_process', + tmpdir=os.path.curdir, + cpoint=None, + cvect=None, + sample=0.2, + end_radius=22, + output_level=0, + prefix=None, + write_input_files=False): + super(HoleAnalysis, self).__init__(universe.universe.trajectory, + verbose=verbose) + if output_level > 3: + msg = 'output_level ({}) needs to be < 3 in order to extract a HOLE profile!' + warnings.warn(msg.format(output_level)) + + if prefix is None: + prefix = '' + + if isinstance(cpoint, str): + if 'geometry' in cpoint.lower(): + self._guess_cpoint = True + self.cpoint = '{cpoint[0]:.10f} {cpoint[1]:.10f} {cpoint[2]:.10f}' + else: + self._guess_cpoint = False + self.cpoint = cpoint + + self.prefix = prefix + self.cvect = cvect + self.sample = sample + self.end_radius = end_radius + self.output_level = output_level + self.write_input_files = write_input_files + self.select = select + self.ag = universe.select_atoms(select, updating=True) + self.universe = universe + self.tmpdir = tmpdir + self.ignore_residues = ignore_residues + + # --- finding executables ---- + hole = util.which(executable) + if hole is None: + raise OSError(errno.ENOENT, exe_err.format(name=hole, + kw='executable')) + self.base_path = os.path.dirname(hole) + + sos_triangle_path = util.which(sos_triangle) + if sos_triangle_path is None: + path = os.path.join(self.base_path, sos_triangle) + sos_triangle_path = util.which(path) + if sos_triangle_path is None: + raise OSError(errno.ENOENT, exe_err.format(name=sos_triangle, + kw='sos_triangle')) + sph_process_path = util.which(sph_process) + if sph_process_path is None: + path = os.path.join(self.base_path, 'sph_process') + sph_process_path = util.which(path) + if sph_process_path is None: + raise OSError(errno.ENOENT, exe_err.format(name=sph_process, + kw='sph_process')) + + self.exe = { + 'hole': hole, + 'sos_triangle': sos_triangle_path, + 'sph_process': sph_process_path + } + + # --- setting up temp files ---- + self.tmp_files = [] + if vdwradii_file is not None: + self.vdwradii_file = check_and_fix_long_filename(vdwradii_file, + tmpdir=self.tmpdir) + if os.path.islink(self.vdwradii_file): + self.tmp_files.append(self.vdwradii_file) + else: + self.vdwradii_file = write_simplerad2() + self.tmp_files.append(self.vdwradii_file) + + # --- setting up input header ---- + filenames = [universe.filename] + try: + filenames.extend(universe.trajectory.filenames) + except AttributeError: + filenames.append(universe.trajectory.filename) + hole_filenames = '\n! '.join(filenames) + self._input_header = self.hole_header.format(hole_filenames) + + def run(self, start=None, stop=None, step=None, verbose=None, + random_seed=None): + """ + Perform the calculation + + Parameters + ---------- + start : int, optional + start frame of analysis + + stop : int, optional + stop frame of analysis + + step : int, optional + number of frames to skip between each analysed frame + + verbose : bool, optional + Turn on verbosity + + random_seed : int, optional + integer number to start the random number generator. + By default, + :program:`hole` will use the time of the day. + For reproducible runs (e.g., for testing) set ``random_seed`` + to an integer. Default: ``None`` + """ + self.random_seed = random_seed + return super(HoleAnalysis, self).run(start=start, stop=stop, + step=step, verbose=verbose) + + def _prepare(self): + """Set up containers and generate input file text""" + # set up containers + self.sphpdbs = np.zeros(self.n_frames, dtype=object) + self.outfiles = np.zeros(self.n_frames, dtype=object) + self.frames = np.zeros(self.n_frames, dtype=int) + self.profiles = {} + + # generate input file + body = set_up_hole_input('', + infile_text=self.hole_body, + infile=None, + vdwradii_file=self.vdwradii_file, + tmpdir=self.tmpdir, + sample=self.sample, + end_radius=self.end_radius, + cpoint=self.cpoint, + cvect=self.cvect, + random_seed=self.random_seed, + ignore_residues=self.ignore_residues, + output_level=self.output_level, + dcd=None) + + self.infile_text = self._input_header + body + + def guess_cpoint(self): + """Guess a point inside the pore. + + This method simply uses the center of geometry of the selection as a + guess. + + Returns + ------- + float: + center of geometry of selected AtomGroup + """ + return self.ag.center_of_geometry() + + def _single_frame(self): + """Run HOLE analysis and collect profiles""" + # set up files + frame = self._ts.frame + i = self._frame_index + outfile = self.output_file.format(prefix=self.prefix, i=frame) + sphpdb = self.sphpdb_file.format(prefix=self.prefix, i=frame) + self.sphpdbs[i] = sphpdb + self.outfiles[i] = outfile + if outfile not in self.tmp_files: + self.tmp_files.append(outfile) + if sphpdb not in self.tmp_files: + self.tmp_files.append(sphpdb) + else: + self.tmp_files.append(sphpdb + '.old') + self.frames[i] = frame + + # temp pdb + logger.info('HOLE analysis frame {}'.format(frame)) + fd, pdbfile = tempfile.mkstemp(suffix='.pdb') + os.close(fd) # close immediately (Issue 129) + + # get infile text + fmt_kwargs = {'i': frame, 'coordinates': pdbfile, 'sphpdb': sphpdb} + if self._guess_cpoint: + fmt_kwargs['cpoint'] = self.guess_cpoint() + infile_text = self.infile_text.format(**fmt_kwargs) + + if self.write_input_files: + infile = self.input_file.format(prefix=self.prefix, i=frame) + with open(infile, 'w') as f: + f.write(infile_text) + + try: + self.ag.write(pdbfile) + run_hole(outfile=outfile, infile_text=infile_text, + executable=self.exe['hole']) + finally: + try: + os.unlink(pdbfile) + except OSError: + pass + + recarrays = collect_hole(outfile=outfile) + try: + self.profiles[frame] = recarrays[0] + except KeyError: + msg = 'No profile found in HOLE output. Output level: {}' + logger.info(msg.format(self.output_level)) + + def create_vmd_surface(self, filename='hole.vmd', dot_density=15, + no_water_color='red', one_water_color='green', + double_water_color='blue'): + """Process HOLE output to create a smooth pore surface suitable for VMD. + + Takes the ``sphpdb`` file for each frame and feeds it to `sph_process `_ + and `sos_triangle `_ as described under `Visualization of HOLE + results `_. + + Load the output file *filename* into VMD in Extensions > Tk Console :: + + source hole.vmd + + The level of detail is determined by ``dot_density``. + The surface will be colored by ``no_water_color``, ``one_water_color``, and + ``double_water_color``. You can change these in the + Tk Console:: + + set no_water_color blue + + + Parameters + ---------- + filename: str, optional + file to write the pore surfaces to. + + dot_density: int, optional + density of facets for generating a 3D pore representation. + The number controls the density of dots that will be used. + A sphere of dots is placed on each centre determined in the + Monte Carlo procedure. The actual number of dots written is + controlled by ``dot_density`` and the ``sample`` level of the + original analysis. ``dot_density`` should be set between 5 + (few dots per sphere) and 35 (many dots per sphere). + + no_water_color: str, optional + Color of the surface where the pore radius is too tight for a + water molecule. + + one_water_color: str, optional + Color of the surface where the pore can fit one water molecule. + + double_water_color: str, optional + Color of the surface where the radius is at least double the + minimum radius for one water molecule. + + + Returns + ------- + str + ``filename`` with the pore surfaces. + + """ + if self.sphpdbs is None or len(self.sphpdbs) == 0: + raise ValueError('No sphpdb files to read. Try calling run()') + + frames = [] + for i, sphpdb in zip(self.frames, self.sphpdbs[self.frames]): + tmp_tri = create_vmd_surface(sphpdb=sphpdb, + sph_process=self.exe['sph_process'], + sos_triangle=self.exe['sos_triangle'], + dot_density=dot_density) + + shapes = [[], [], []] + with open(tmp_tri) as f: + for line in f: + if line.startswith('draw color'): + color = line.split()[-1].lower() + if color == 'red': + dest = shapes[0] + elif color == 'green': + dest = shapes[1] + elif color == 'blue': + dest = shapes[2] + else: + msg = 'Encountered unknown color {}' + raise ValueError(msg.format(color)) + + if line.startswith('draw trinorm'): + line = line.strip('draw trinorm').strip() + dest.append('{{ {} }}'.format(line)) + try: + os.unlink(tmp_tri) + except OSError: + pass + + tri = '{ { ' + ' } { '.join(list(map(' '.join, shapes))) + ' } }' + frames.append('set triangles({i}) '.format(i=i) + tri) + + trinorms = '\n'.join(frames) + vmd_1 = vmd_script_array.format(no_water_color=no_water_color, + one_water_color=one_water_color, + double_water_color=double_water_color) + vmd_text = vmd_1 + trinorms + vmd_script_function + + with open(filename, 'w') as f: + f.write(vmd_text) + + return filename + + def min_radius(self): + """Return the minimum radius over all profiles as a function of q""" + if not self.profiles: + raise ValueError('No profiles available. Try calling run()') + return np.array([[q, p.radius.min()] for q, p in self.profiles.items()]) + + def min_radius(self): + """Return the minimum radius over all profiles as a function of q""" + if not self.profiles: + raise ValueError('No profiles available. Try calling run()') + return np.array([[q, p.radius.min()] for q, p in self.profiles.items()]) + + def delete_temporary_files(self): + """Delete temporary files""" + for f in self.tmp_files: + try: + os.unlink(f) + except OSError: + pass + self.tmp_files = [] + self.outfiles = [] + self.sphpdbs = [] + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Delete temporary files on exit""" + self.delete_temporary_files() + + def _process_plot_kwargs(self, frames=None, + color=None, cmap='viridis', + linestyle='-'): + """Process the colors and linestyles for plotting + + Parameters + ---------- + frames: array-like, optional + Frames to plot. If ``None``, plots all of them. + Default: ``None`` + color: str or array-like, optional + Color or colors for the plot. If ``None``, colors are + drawn from ``cmap``. Default: ``None`` + cmap: str, optional + color map to make colors for the plot if ``color`` is + not given. Names should be from the ``matplotlib.pyplot.cm`` + module. Default: 'viridis' + linestyle: str or array-like, optional + Line style for the plot. Default: '-' + + + Returns + ------- + (array-like, array-like, array-like) + frames, colors, linestyles + """ + + if frames is None: + frames = self.frames + else: + frames = util.asiterable(frames) + + if color is None: + colormap = plt.cm.get_cmap(cmap) + norm = matplotlib.colors.Normalize(vmin=min(frames), + vmax=max(frames)) + colors = colormap(norm(frames)) + else: + colors = itertools.cycle(util.asiterable(color)) + + linestyles = itertools.cycle(util.asiterable(linestyle)) + + return frames, colors, linestyles + + def plot(self, frames=None, + color=None, cmap='viridis', + linestyle='-', y_shift=0.0, + label=True, ax=None, + legend_loc='best', **kwargs): + """Plot HOLE profiles :math:`R(\zeta)` in a 1D graph. + + Lines are colored according to the specified ``color`` or + drawn from the color map ``cmap``. One line is + plotted for each trajectory frame. + + Parameters + ---------- + frames: array-like, optional + Frames to plot. If ``None``, plots all of them. + Default: ``None`` + color: str or array-like, optional + Color or colors for the plot. If ``None``, colors are + drawn from ``cmap``. Default: ``None`` + cmap: str, optional + color map to make colors for the plot if ``color`` is + not given. Names should be from the ``matplotlib.pyplot.cm`` + module. Default: 'viridis' + linestyle: str or array-like, optional + Line style for the plot. Default: '-' + y_shift : float, optional + displace each :math:`R(\zeta)` profile by ``y_shift`` in the + :math:`y`-direction for clearer visualization. Default: 0.0 + label : bool or string, optional + If ``False`` then no legend is + displayed. Default: ``True`` + ax : :class:`matplotlib.axes.Axes` + If no `ax` is supplied or set to ``None`` then the plot will + be added to the current active axes. Default: ``None`` + legend_loc : str, optional + Location of the legend. Default: 'best' + kwargs : `**kwargs` + All other `kwargs` are passed to :func:`matplotlib.pyplot.plot`. + + + Returns + ------- + ax : :class:`~matplotlib.axes.Axes` + Axes with the plot, either `ax` or the current axes. + + """ + + if not self.profiles: + raise ValueError('No profiles available. Try calling run()') + + if ax is None: + fig, ax = plt.subplots() + + fcl = self._process_plot_kwargs(frames=frames, + color=color, cmap=cmap, linestyle=linestyle) + + for i, (frame, c, ls) in enumerate(zip(*fcl)): + profile = self.profiles[frame] + dy = i*y_shift + ax.plot(profile.rxn_coord, profile.radius+dy, color=c, + linestyle=ls, zorder=-frame, label=str(frame), + **kwargs) + + ax.set_xlabel(r"Pore coordinate $\zeta$ ($\AA$)") + ax.set_ylabel(r"HOLE radius $R$ ($\AA$)") + if label == True: + ax.legend(loc=legend_loc) + return ax + + def plot3D(self, frames=None, + color=None, cmap='viridis', + linestyle='-', ax=None, r_max=None, + ylabel='Frames', **kwargs): + """Stacked 3D graph of profiles :math:`R(\zeta)`. + + Lines are colored according to the specified ``color`` or + drawn from the color map ``cmap``. One line is + plotted for each trajectory frame. + + Parameters + ---------- + frames: array-like, optional + Frames to plot. If ``None``, plots all of them. + Default: ``None`` + color: str or array-like, optional + Color or colors for the plot. If ``None``, colors are + drawn from ``cmap``. Default: ``None`` + cmap: str, optional + color map to make colors for the plot if ``color`` is + not given. Names should be from the ``matplotlib.pyplot.cm`` + module. Default: 'viridis' + linestyle: str or array-like, optional + Line style for the plot. Default: '-' + r_max : float, optional + only display radii up to ``r_max``. If ``None``, all radii are + plotted. Default: ``None`` + ax : :class:`matplotlib.axes.Axes` + If no `ax` is supplied or set to ``None`` then the plot will + be added to the current active axes. Default: ``None`` + ylabel : str, optional + Y-axis label. Default: 'Frames' + **kwargs : + All other `kwargs` are passed to :func:`matplotlib.pyplot.plot`. + + + Returns + ------- + ax : :class:`~mpl_toolkits.mplot3d.Axes3D` + Axes with the plot, either `ax` or the current axes. + + """ + + if not self.profiles: + raise ValueError('No profiles available. Try calling run()') + + from mpl_toolkits.mplot3d import Axes3D + + if ax is None: + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + + fcl = self._process_plot_kwargs(frames=frames, + color=color, cmap=cmap, + linestyle=linestyle) + + for frame, c, ls in zip(*fcl): + profile = self.profiles[frame] + if r_max is None: + radius = profile.radius + rxn_coord = profile.rxn_coord + else: + # does not seem to work with masked arrays but with nan hack! + # http://stackoverflow.com/questions/4913306/python-matplotlib-mplot3d-how-do-i-set-a-maximum-value-for-the-z-axis + rxn_coord = profile.rxn_coord + radius = profile.radius.copy() + radius[radius > r_max] = np.nan + ax.plot(rxn_coord, frame*np.ones_like(rxn_coord), radius, + color=c, linestyle=ls, zorder=-frame, label=str(frame), + **kwargs) + + ax.set_xlabel(r"Pore coordinate $\zeta$ ($\AA$)") + ax.set_ylabel(ylabel) + ax.set_zlabel(r"HOLE radius $R$ ($\AA$)") + plt.tight_layout() + + return ax + + def over_order_parameters(self, order_parameters, frames=None): + """Get HOLE profiles sorted over order parameters ``order_parameters``. + + Parameters + ---------- + order_parameters: array-like or string + Sequence or text file containing order parameters (float + numbers) corresponding to the frames in the trajectory. Must + be same length as trajectory. + frames: array-like, optional + Selected frames to return. If ``None``, returns all of them. + Default: ``None`` + + Returns + ------- + collections.OrderedDict + sorted dictionary of {order_parameter:profile} + + """ + if not self.profiles: + raise ValueError('No profiles available. Try calling run()') + if isinstance(order_parameters, six.string_types): + try: + order_parameters = np.loadtxt(order_parameters) + except IOError: + raise ValueError('Data file not found: {}'.format(order_parameters)) + except (ValueError, TypeError): + msg = ('Could not parse given file: {}. ' + '`order_parameters` must be array-like ' + 'or a filename with array data ' + 'that can be read by np.loadtxt') + raise ValueError(msg.format(order_parameters)) + + + order_parameters = np.asarray(order_parameters) + + if len(order_parameters) != len(self._trajectory): + msg = ('The number of order parameters ({}) must match the ' + 'length of the trajectory ({} frames)') + raise ValueError(msg.format(len(order_parameters), + len(self._trajectory))) + + if frames is None: + frames = self.frames + else: + frames = np.asarray(util.asiterable(frames)) + + idx = np.argsort(order_parameters[frames]) + sorted_frames = frames[idx] + + profiles = OrderedDict() + for frame in sorted_frames: + profiles[order_parameters[frame]] = self.profiles[frame] + + return profiles + + def plot_order_parameters(self, order_parameters, + aggregator=min, + frames=None, + color='blue', + linestyle='-', ax=None, + ylabel=r'Minimum HOLE pore radius $r$ ($\AA$)', + xlabel='Order parameter', + **kwargs): + r"""Plot HOLE radii over order parameters. This function needs + an ``aggregator`` function to reduce the ``radius`` array to a + single value, e.g. ``min``, ``max``, or ``np.mean``. + + Parameters + ---------- + order_parameters: array-like or string + Sequence or text file containing order parameters (float + numbers) corresponding to the frames in the trajectory. Must + be same length as trajectory. + aggregator: callable, optional + Function applied to the radius array of each profile to + reduce it to one representative value. Default: ``min`` + frames: array-like, optional + Frames to plot. If ``None``, plots all of them. + Default: ``None`` + color: str or array-like, optional + Color for the plot. Default: 'blue' + linestyle: str or array-like, optional + Line style for the plot. Default: '-' + ax : :class:`matplotlib.axes.Axes` + If no `ax` is supplied or set to ``None`` then the plot will + be added to the current active axes. Default: ``None`` + xlabel : str, optional + X-axis label. Default: 'Order parameter' + ylabel : str, optional + Y-axis label. Default: 'Minimum HOLE pore radius $r$ ($\AA$)' + **kwargs : + All other `kwargs` are passed to :func:`matplotlib.pyplot.plot`. + + + Returns + ------- + ax : :class:`~matplotlib.axes.Axes` + Axes with the plot, either `ax` or the current axes. + + """ + + op_profiles = self.over_order_parameters(order_parameters, + frames=frames) + + if ax is None: + fig, ax = plt.subplots() + + data = [[x, aggregator(p.radius)] for x, p in op_profiles.items()] + arr = np.array(data) + ax.plot(arr[:, 0], arr[:, 1], color=color, linestyle=linestyle) + + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + return ax + + def gather(self, frames=None, flat=False): + """Gather the fields of each profile recarray together. + + Parameters + ---------- + frames: int or iterable of ints, optional + Profiles to include by frame. If ``None``, includes + all frames. Default: ``None`` + + flat: bool, optional + Whether to flatten the list of field arrays into a + single array. Default: ``False`` + + + Returns + ------- + dict + dictionary of fields + """ + if frames is None: + frames = self.frames + frames = util.asiterable(frames) + profiles = [self.profiles[k] for k in frames] + + rxncoords = [p.rxn_coord for p in profiles] + radii = [p.radius for p in profiles] + cen_line_Ds = [p.cen_line_D for p in profiles] + + if flat: + rxncoords = np.concatenate(rxncoords) + radii = np.concatenate(radii) + cen_line_Ds = np.concatenate(cen_line_Ds) + + dct = {'rxn_coord': rxncoords, + 'radius': radii, + 'cen_line_D': cen_line_Ds} + return dct + + def bin_radii(self, frames=None, bins=100, range=None): + """Collects the pore radii into bins by reaction coordinate. + + Parameters + ---------- + frames: int or iterable of ints, optional + Profiles to include by frame. If ``None``, includes + all frames. Default: ``None`` + + bins: int or iterable of edges, optional + If bins is an int, it defines the number of equal-width bins in the given range. If bins is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing for non-uniform bin widths. Default: 100 + + range : (float, float), optional + The lower and upper range of the bins. + If not provided, ``range`` is simply ``(a.min(), a.max())``, + where ``a`` is the array of reaction coordinates. + Values outside the range are ignored. The first element of the range must be less than or equal to the second. + + + Returns + ------- + list of arrays of floats + List of radii present in each bin + array of (float, float) + Edges of each bin + """ + agg = self.gather(frames=frames, flat=True) + coords = agg['rxn_coord'] + + if not util.iterable(bins): + if range is None: + range = (coords.min(), coords.max()) + xmin, xmax = range + if xmin == xmax: + xmin -= 0.5 + xmax += 0.5 + bins = np.linspace(xmin, xmax, bins+1, endpoint=True) + else: + bins = np.asarray(bins) + bins = bins[np.argsort(bins)] + + idx = np.argsort(coords) + coords = coords[idx] + radii = agg['radius'][idx] + # left: inserts at i where coords[:i] < edge + # right: inserts at i where coords[:i] <= edge + # r_ concatenates + bin_idx = np.r_[coords.searchsorted(bins, side='right')] + binned = [radii[i:j] for i, j in zip(bin_idx[:-1], bin_idx[1:])] + return binned, bins + + def histogram_radii(self, aggregator=np.mean, frames=None, + bins=100, range=None): + """Histograms the pore radii into bins by reaction coordinate, + aggregate the radii with an `aggregator` function, and returns the + aggregated radii and bin edges. + + Parameters + ---------- + aggregator: callable, optional + this function must take an iterable of floats and return a + single value. Default: np.mean + + frames: int or iterable of ints, optional + Profiles to include by frame. If ``None``, includes + all frames. Default: ``None`` + + bins: int or iterable of edges, optional + If bins is an int, it defines the number of equal-width bins in the given range. If bins is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing for non-uniform bin widths. Default: 100 + + range : (float, float), optional + The lower and upper range of the bins. + If not provided, ``range`` is simply ``(a.min(), a.max())``, + where ``a`` is the array of reaction coordinates. + Values outside the range are ignored. The first element of the range must be less than or equal to the second. + + + Returns + ------- + array of floats + histogrammed, aggregate value of radii + array of (float, float) + Edges of each bin + """ + binned, bins = self.bin_radii(frames=frames, bins=bins, range=range) + return np.array(list(map(aggregator, binned))), bins + + def plot_mean_profile(self, bins=100, range=None, + frames=None, color='blue', + linestyle='-', ax=None, + xlabel='Frame', fill_alpha=0.3, + n_std=1, legend=True, + legend_loc='best', + **kwargs): + """Collects the pore radii into bins by reaction coordinate. + + Parameters + ---------- + frames: int or iterable of ints, optional + Profiles to include by frame. If ``None``, includes + all frames. Default: ``None`` + + bins: int or iterable of edges, optional + If bins is an int, it defines the number of equal-width bins in the given range. If bins is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing for non-uniform bin widths. Default: 100 + + range : (float, float), optional + The lower and upper range of the bins. + If not provided, ``range`` is simply ``(a.min(), a.max())``, + where ``a`` is the array of reaction coordinates. + Values outside the range are ignored. The first element of the range must be less than or equal to the second. + + color: str or array-like, optional + Color for the plot. Default: 'blue' + + linestyle: str or array-like, optional + Line style for the plot. Default: '-' + + ax : :class:`matplotlib.axes.Axes` + If no `ax` is supplied or set to ``None`` then the plot will + be added to the current active axes. Default: ``None`` + + xlabel : str, optional + X-axis label. Default: 'Order parameter' + + fill_alpha: float, optional + Opacity of filled standard deviation area Default: 0.3 + + n_std: int, optional + Number of standard deviations from the mean to fill between. + Default: 1 + + legend: bool, optional + Whether to plot a legend. Default: True + + legend_loc: str, optional + Location of legend. Default: 'best' + + **kwargs : + All other `kwargs` are passed to :func:`matplotlib.pyplot.plot`. + + + Returns + ------- + ax : :class:`~matplotlib.axes.Axes` + Axes with the plot, either `ax` or the current axes. + + """ + + binned, bins = self.bin_radii(frames=frames, bins=bins, range=range) + mean = np.array(list(map(np.mean, binned))) + midpoints = 0.5 * bins[1:] + bins[:-1] + + fig, ax = plt.subplots() + if n_std: + std = np.array(list(map(np.std, binned))) + ax.fill_between(midpoints, mean-(n_std*std), mean+(n_std*std), + color=color, alpha=fill_alpha, + label='{} std'.format(n_std)) + ax.plot(midpoints, mean, color=color, + linestyle=linestyle, label='mean', **kwargs) + ax.set_xlabel(r"Pore coordinate $\zeta$ ($\AA$)") + ax.set_ylabel(r"HOLE radius $R$ ($\AA$)") + if legend: + ax.legend(loc=legend_loc) + return ax + + def plot3D_order_parameters(self, order_parameters, + frames=None, + color=None, + cmap='viridis', + linestyle='-', ax=None, + r_max=None, + ylabel=r'Order parameter', + **kwargs): + """Plot HOLE radii over order parameters as a 3D graph. + + Lines are colored according to the specified ``color`` or + drawn from the color map ``cmap``. One line is + plotted for each trajectory frame. + + Parameters + ---------- + order_parameters: array-like or string + Sequence or text file containing order parameters(float + numbers) corresponding to the frames in the trajectory. Must + be same length as trajectory. + frames: array-like, optional + Frames to plot. If ``None``, plots all of them. + Default: ``None`` + color: str or array-like, optional + Color or colors for the plot. If ``None``, colors are + drawn from ``cmap``. Default: ``None`` + cmap: str, optional + color map to make colors for the plot if ``color`` is + not given. Names should be from the ``matplotlib.pyplot.cm`` + module. Default: 'viridis' + linestyle: str or array-like, optional + Line style for the plot. Default: '-' + ax: : class: `matplotlib.axes.Axes` + If no `ax` is supplied or set to ``None`` then the plot will + be added to the current active axes. Default: ``None`` + r_max: float, optional + only display radii up to ``r_max``. If ``None``, all radii are + plotted. Default: ``None`` + ylabel: str, optional + Y-axis label. Default: 'Order parameter' + **kwargs: + All other `kwargs` are passed to: func: `matplotlib.pyplot.plot`. + + Returns + ------- + ax: : class: `~mpl_toolkits.mplot3d.Axes3D` + Axes with the plot, either `ax` or the current axes. + + """ + op_profiles = self.over_order_parameters(order_parameters, + frames=frames) + + from mpl_toolkits.mplot3d import Axes3D + + if ax is None: + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + + ocl = self._process_plot_kwargs(frames=list(op_profiles.keys()), + color=color, cmap=cmap, + linestyle=linestyle) + + for op, c, ls in zip(*ocl): + profile = op_profiles[op] + if r_max is None: + radius = profile.radius + rxn_coord = profile.rxn_coord + else: + # does not seem to work with masked arrays but with nan hack! + # http://stackoverflow.com/questions/4913306/python-matplotlib-mplot3d-how-do-i-set-a-maximum-value-for-the-z-axis + rxn_coord = profile.rxn_coord + radius = profile.radius.copy() + radius[radius > r_max] = np.nan + ax.plot(rxn_coord, op*np.ones_like(rxn_coord), radius, + color=c, linestyle=ls, zorder=int(-op), label=str(op), + **kwargs) + + ax.set_xlabel(r"Pore coordinate $\zeta$ ($\AA$)") + ax.set_ylabel(ylabel) + ax.set_zlabel(r"HOLE radius $R$ ($\AA$)") + plt.tight_layout() + return ax diff --git a/package/MDAnalysis/analysis/hole2/templates.py b/package/MDAnalysis/analysis/hole2/templates.py new file mode 100644 index 00000000000..855104aaddb --- /dev/null +++ b/package/MDAnalysis/analysis/hole2/templates.py @@ -0,0 +1,139 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2020 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +"""HOLE Analysis --- :mod:`MDAnalysis.analysis.hole2.templates` +===================================================================================== + +:Author: Lily Wang +:Year: 2020 +:Copyright: GNU Public License v3 + +.. versionadded:: 1.0 + +Templates used in :mod:`MDAnalysis.analysis.hole2.hole` +""" + +exe_err = ('HOLE binary {name} not found. {name} must be on the ' + 'PATH, or the path must provided with the keyword ' + 'argument: {kw}') + +IGNORE_RESIDUES = ["SOL", "WAT", "TIP", "HOH", "K ", "NA ", "CL "] + + +#: Built-in HOLE radii (based on ``simple.rad`` from the HOLE_ distribution): +#: van der Waals radii are AMBER united atom from Weiner et al. (1984), JACS, vol 106 pp765-768. +#: *Simple* - Only use one value for each element C O H etc. +#: Added radii for K+, NA+, CL- (Pauling hydration radius from Hille 2002). +#: The data file can be written with the convenience function :func:`write_simplerad2`. +SIMPLE2_RAD = """ +remark: Time-stamp: <2005-11-21 13:57:55 oliver> [OB] +remark: van der Waals radii: AMBER united atom +remark: from Weiner et al. (1984), JACS, vol 106 pp765-768 +remark: Simple - Only use one value for each element C O H etc. +remark: van der Waals radii +remark: general last +VDWR C??? ??? 1.85 +VDWR O??? ??? 1.65 +VDWR S??? ??? 2.00 +VDWR N??? ??? 1.75 +VDWR H??? ??? 1.00 +VDWR H? ??? 1.00 +VDWR P??? ??? 2.10 +remark: ASN, GLN polar H (odd names for these atoms in xplor) +VDWR E2? GLN 1.00 +VDWR D2? ASN 1.00 +remark: amber lone pairs on sulphurs +VDWR LP?? ??? 0.00 +remark: for some funny reason it wants radius for K even though +remark: it is on the exclude list +remark: Use Pauling hydration radius (Hille 2001) [OB] +VDWR K? ??? 1.33 +VDWR NA? ??? 0.95 +VDWR CL? ??? 1.81 +remark: funny hydrogens in gA structure [OB] +VDWR 1H? ??? 1.00 +VDWR 2H? ??? 1.00 +VDWR 3H? ??? 1.00 +remark: need bond rad for molqpt option +BOND C??? 0.85 +BOND N??? 0.75 +BOND O??? 0.7 +BOND S??? 1.1 +BOND H??? 0.5 +BOND P??? 1.0 +BOND ???? 0.85 +""" + +hole_input = """ +! Input file for Oliver Smart's HOLE program +! written by MDAnalysis.analysis.hole2.hole +! filename = {filename} +COORD {coordinates} +RADIUS {radius} +SPHPDB {sphpdb} +SAMPLE {sample:f} +ENDRAD {end_radius:f} +IGNORE {ignore} +SHORTO {output_level:d} +""" + +hole_lines = { + 'cpoint': 'CPOINT {:.10f} {:.10f} {:.10f}\n', + 'cvect': 'CVECT {:.10f} {:.10f} {:.10f}\n', + 'random_seed': 'RASEED {}\n', + 'dcd': 'CHARMD {dcd}\nCHARMS {iniskip:d} {step:d}\n', +} + +vmd_script_array = """\ +set no_water_color {no_water_color} +set one_water_color {one_water_color} +set double_water_color {double_water_color} +array set triangles {{}} +""" + +vmd_script_function = """ +global vmd_frame; +trace add variable vmd_frame([molinfo top]) write drawFrame + +proc drawFrame { name element op } { + global vmd_frame triangles no_water_color one_water_color double_water_color; + set frame $vmd_frame([molinfo top]) + draw delete all; + + draw color $no_water_color; + foreach shape [lindex $triangles($frame) 0] { + draw trinorm {*}$shape + } + draw color $one_water_color; + foreach shape [lindex $triangles($frame) 1] { + draw trinorm {*}$shape + } + draw color $double_water_color; + foreach shape [lindex $triangles($frame) 2] { + draw trinorm {*}$shape + } + } + +drawFrame 0 0 0 +""" + diff --git a/package/MDAnalysis/analysis/hole2/utils.py b/package/MDAnalysis/analysis/hole2/utils.py new file mode 100644 index 00000000000..544c55d86fd --- /dev/null +++ b/package/MDAnalysis/analysis/hole2/utils.py @@ -0,0 +1,562 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2020 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + + +"""HOLE Analysis --- :mod:`MDAnalysis.analysis.hole2.helper` +===================================================================================== + +:Author: Lily Wang +:Year: 2020 +:Copyright: GNU Public License v3 + +.. versionadded:: 1.0 + +Helper functions used in :mod:`MDAnalysis.analysis.hole2.hole` +""" + +from __future__ import absolute_import + +import logging +import tempfile +import subprocess +import os +import numpy as np +import six +import errno + +from ...lib import util +from ...exceptions import ApplicationError +from .templates import (SIMPLE2_RAD, IGNORE_RESIDUES, hole_input, + hole_lines, exe_err) + +logger = logging.getLogger(__name__) + + +def write_simplerad2(filename="simple2.rad"): + """Write the built-in radii in :data:`SIMPLE2_RAD` to `filename`. + + Does nothing if `filename` already exists. + + Parameters + ---------- + filename : str, optional + output file name; the default is "simple2.rad" + + Returns + ------- + filename : str + returns the name of the data file + """ + + if not os.path.exists(filename): + with open(filename, "w") as rad: + rad.write(SIMPLE2_RAD + "\n") + logger.debug("Created simple radii file {}".format(filename)) + return filename + + +def check_and_fix_long_filename(filename, tmpdir=os.path.curdir, + max_length=70, + make_symlink=True): + """Return a file name suitable for HOLE. + + HOLE is limited to filenames <= ``max_length``. This method + + 1. returns `filename` if HOLE can process it + 2. returns a relative path (see :func:`os.path.relpath`) if that shortens the + path sufficiently + 3. creates a symlink to `filename` (:func:`os.symlink`) in a safe temporary + directory and returns the path of the symlink. + + Parameters + ---------- + filename : str + file name to be processed + tmpdir : str, optional + By default the temporary directory is created inside the current + directory in order to keep that path name short. This can be changed + with the `tmpdir` keyword (e.g. one can use "/tmp"). The default is + the current directory :data:`os.path.curdir`. + + Returns + ------- + str + path to the file that has a length less than + ``max_length`` + + Raises + ------ + RuntimeError + If none of the tricks for filename shortening worked. In this case, + manually rename the file or recompile your version of HOLE. + """ + + if len(filename) <= max_length: + return filename + + msg = ('HOLE will not read {} ' + 'because it has more than {} characters.') + logger.debug(msg.format(filename, max_length)) + + # try a relative path + new_name = os.path.relpath(filename) + if len(new_name) <= max_length: + msg = 'Using relative path: {} -> {}' + logger.debug(msg.format(filename, new_name)) + return new_name + + if make_symlink: + # shorten path by creating a symlink inside a safe temp dir + _, ext = os.path.splitext(filename) + dirname = tempfile.mkdtemp(dir=tmpdir) + newname = os.path.join(dirname, os.path.basename(filename)) + if len(newname) > max_length: + fd, newname = tempfile.mkstemp(suffix=ext, dir=dirname) + os.close(fd) + os.unlink(newname) + + if len(newname) > max_length: + newname = os.path.relpath(newname) + + if len(newname) <= max_length: + os.symlink(filename, newname) + msg = 'Using symlink: {} -> {}' + logger.debug(msg.format(filename, newname)) + return newname + + msg = 'Failed to shorten filename {}' + raise RuntimeError(msg.format(filename)) + + +def set_up_hole_input(pdbfile, + infile_text=None, + infile=None, + sphpdb_file='hole.sph', + vdwradii_file=None, + tmpdir=os.path.curdir, + sample=0.2, + end_radius=22, + cpoint=None, + cvect=None, + random_seed=None, + ignore_residues=IGNORE_RESIDUES, + output_level=0, + dcd=None, + dcd_iniskip=0, + dcd_step=1): + """ + Generate HOLE input for the parameters. + + Parameters + ---------- + pdbfile : str + The `filename` is used as input for HOLE in the "COORD" card of the + input file. It specifies the name of a PDB coordinate file to be + used. This must be in Brookhaven protein databank format or + something closely approximating this. Both ATOM and HETATM records + are read. + + infile_text: str, optional + HOLE input text or template. If set to ``None``, the function will + create the input text from the other parameters. + Default: ``None`` + + infile: str, optional + File to write the HOLE input text for later inspection. If set to + ``None``, the input text is not written out. + Default: ``None`` + + sphpdb_file : str, optional + path to the HOLE sph file, a PDB-like file containing the + coordinates of the pore centers. + The coordinates are set to the sphere centres and the occupancies + are the sphere radii. All centres are assigned the atom name QSS and + residue name SPH and the residue number is set to the storage + number of the centre. In VMD, sph + objects are best displayed as "Points". Displaying .sph objects + rather than rendered or dot surfaces can be useful to analyze the + distance of particular atoms from the sphere-centre line. + .sph files can be used to produce molecular graphical + output from a hole run, by using the + :program:`sph_process` program to read the .sph file. + Default: 'hole.sph' + + vdwradii_file: str, optional + path to the file specifying van der Waals radii for each atom. If + set to ``None``, then a set of default radii, + :data:`SIMPLE2_RAD`, is used (an extension of ``simple.rad`` from + the HOLE distribution). Default: ``None`` + + tmpdir: str, optional + The temporary directory that files can be symlinked to, to shorten + the path name. HOLE can only read filenames up to a certain length. + Default: current working directory + + sample : float, optional + distance of sample points in Å. + Specifies the distance between the planes used in the HOLE + procedure. The default value should be reasonable for most + purposes. However, if you wish to visualize a very tight + constriction then specify a smaller value. + This value determines how many points in the pore profile are + calculated. Default: 0.2 + + end_radius : float, optional + Radius in Å, which is considered to be the end of the pore. This + keyword can be used to specify the radius above which the + program regards a result as indicating that the end of the pore + has been reached. This may need to be increased for large channels, + or reduced for small channels. Default: 22.0 + + cpoint : array_like, 'center_of_geometry' or None, optional + coordinates of a point inside the pore, e.g. ``[12.3, 0.7, + 18.55]``. If set to ``None`` (the default) then HOLE's own search + algorithm is used. + ``cpoint`` specifies a point which lies within the channel. For + simple channels (e.g. gramicidin), results do not show great + sensitivity to the exact point taken. An easy way to produce an + initial point is to use molecular graphics to find two atoms which + lie either side of the pore and to average their coordinates. Or + if the channel structure contains water molecules or counter ions + then take the coordinates of one of these (and use the + `ignore_residues` keyword to ignore them in the pore radius + calculation). + If this card is not specified, then HOLE (from version 2.2) + attempts to guess where the channel will be. The procedure + assumes the channel is reasonably symmetric. The initial guess on + cpoint will be the centroid of all alpha carbon atoms (name 'CA' + in pdb file). This is then refined by a crude grid search up to 5 + Å from the original position. This procedure works most of the + time but is far from infallible — results should be + carefully checked (with molecular graphics) if it is used. + Default: None + + cvect : array_like, optional + Search direction, should be parallel to the pore axis, + e.g. ``[0,0,1]`` for the z-axis. + If this keyword is ``None`` (the default), then HOLE attempts to guess + where the channel will be. The procedure assumes that the channel is + reasonably symmetric. The guess will be either along the X axis + (1,0,0), Y axis (0,1,0) or Z axis (0,0,1). If the structure is not + aligned on one of these axis the results will clearly be + approximate. If a guess is used then results should be carefully + checked. Default: None + + random_seed : int, optional + integer number to start the random number generator. + By default, + :program:`hole` will use the time of the day. + For reproducible runs (e.g., for testing) set ``random_seed`` + to an integer. Default: ``None`` + + ignore_residues : array_like, optional + sequence of three-letter residues that are not taken into + account during the calculation; wildcards are *not* + supported. Note that all residues must have 3 letters. Pad + with space on the right-hand side if necessary. + Default: {}. + + output_level : int, optional + Determines the output of output in the ``outfile``. + For automated processing, this must be < 3. + 0: Full text output, + 1: All text output given except "run in progress" (i.e., + detailed contemporary description of what HOLE is doing). + 2: Ditto plus no graph type output - only leaving minimum + radius and conductance calculations. + 3: All text output other than input card mirroring and error messages + turned off. + Default: 0 + + dcd : str, optional + File name of DCD trajectory (must be supplied together with a + matching PDB file `filename`) and then HOLE runs its analysis on + each frame. + It does multiple HOLE runs on positions taken from a CHARMM binary + dynamics format DCD trajectory file. The ``dcd`` file must have + exactly the same number of atoms in exactly the same order as the + pdb file specified by ``pdbfile``. Note that if this option is used + the pdb file is used as a template only - the coordinates are + ignored. Note that structural parameters determined for each + individual structure are written in a tagged format so that it is + possible to extract the information from the text output file using + a :program:`grep` command. The reading of the file can be + controlled by the ``dcd_step`` keyword and/or setting + ``dcd_iniskip`` to the number of frames to be skipped + initially. + + dcd_step : int, optional + step size for going through the trajectory (skips ``dcd_step-1`` + frames). Default: 1 + + Returns + ------- + str + input text to run HOLE + + + .. versionadded:: 1.0 + + """.format(IGNORE_RESIDUES) + + short_filename = check_and_fix_long_filename(pdbfile, tmpdir=tmpdir) + if vdwradii_file is not None: + vdwradii_file = check_and_fix_long_filename(vdwradii_file, + tmpdir=tmpdir) + else: + vdwradii_file = write_simplerad2() + + if dcd is not None: + dcd = check_and_fix_long_filename(dcd, tmpdir=tmpdir) + + if infile_text is None: + infile_text = hole_input + + residues = ' '.join(ignore_residues) + + infile_text = infile_text.format(filename=pdbfile, + coordinates=short_filename, + radius=vdwradii_file, + sphpdb=sphpdb_file, + sample=sample, + end_radius=end_radius, + ignore=residues, + output_level=output_level) + + if random_seed is not None: + random_seed = int(random_seed) + infile_text += hole_lines['random_seed'].format(random_seed) + logger.info("Fixed random number seed {} for reproducible " + "runs.".format(random_seed)) + + if cpoint is not None: + if isinstance(cpoint, str): + infile_text += 'CPOINT ' + cpoint + '\n' + else: + infile_text += hole_lines['cpoint'].format(*cpoint) + else: + logger.info("HOLE will guess CPOINT") + + if cvect is not None: + infile_text += hole_lines['cvect'].format(*cvect) + else: + logger.info("HOLE will guess CVECT") + + if dcd is not None: + infile_text += hole_lines['dcd'].format(dcd=dcd, + iniskip=dcd_iniskip, + step=dcd_step) + + if infile is not None: + with open(infile, 'w') as f: + f.write(infile_text) + msg = 'Wrote HOLE input file {} for inspection' + logger.debug(msg.format(infile)) + + return infile_text + + +def run_hole(outfile, infile_text, executable): + """Run the HOLE program. + + Parameters + ---------- + outfile: str + Output file name + infile_text: str + HOLE input text + (typically generated by :func:`set_up_hole_input`) + executable: str + HOLE executable + + + Returns + ------- + str + Output file name + """ + with open(outfile, 'w') as output: + proc = subprocess.Popen(executable, stdin=subprocess.PIPE, + stdout=output) + stdout, stderr = proc.communicate(infile_text.encode('utf-8')) + + # check output in case of errors + with open(outfile, 'r') as output: + for line in output: + if line.strip().startswith(('*** ERROR ***', 'ERROR')): + proc.returncode = 255 + break + + # die in case of error + if proc.returncode != 0: + msg = 'HOLE failure ({}). Check output {}' + logger.fatal(msg.format(proc.returncode, outfile)) + if stderr is not None: + logger.fatal(stderr) + raise ApplicationError(proc.returncode, + msg.format(executable, outfile)) + + logger.info('HOLE finished. Output: {}'.format(outfile)) + return outfile + + +def collect_hole(outfile='hole.out'): + """Collect data from HOLE output + + Parameters + ---------- + outfile: str, optional + HOLE output file to read. Default: 'hole.out' + + + Returns + ------- + dict + Dictionary of HOLE profiles as record arrays + """ + fmt = util.FORTRANReader('3F12') + recarrays = {} + + with open(outfile, 'r') as output: + toggle_read = False + profile = 0 + records = [] + for line in output: + line = line.rstrip() # preserve columns in FORTRAN output + # multiple frames from dcd in? + if line.startswith(" Starting calculation for position number"): + fields = line.split() + profile = int(fields[5]) + records = [] + logger.debug('Started reading profile {}'.format(profile)) + continue + + # found data + if line.startswith(' cenxyz.cvec'): + toggle_read = True + logger.debug('Started reading data') + continue + + if toggle_read: + if len(line.strip()) != 0: + try: + rxncoord, radius, cenlineD = fmt.read(line) + except: + msg = 'Problem parsing line: {}. Check output file {}' + logger.exception(msg.format(line, outfile)) + raise + records.append((rxncoord, radius, cenlineD)) + continue + # end of data + else: + toggle_read = False + names = ['rxn_coord', 'radius', 'cen_line_D'] + recarr = np.rec.fromrecords(records, + formats='f8, f8, f8', + names=names) + recarrays[profile] = recarr + + return recarrays + + +def create_vmd_surface(sphpdb='hole.sph', + filename=None, + sph_process='sph_process', + sos_triangle='sos_triangle', + dot_density=15): + """Create VMD surface file from sphpdb file. + + Parameters + ---------- + sphpdb: str, optional + sphpdb file to read. Default: 'hole.sph' + filename: str, optional + output VMD surface file. If ``None``, a temporary file + is generated. Default: ``None`` + sph_process: str, optional + Executable for ``sph_process`` program. Default: 'sph_process' + sos_triangle: str, optional + Executable for ``sos_triangle`` program. Default: 'sos_triangle' + dot_density: int, optional + density of facets for generating a 3D pore representation. + The number controls the density of dots that will be used. + A sphere of dots is placed on each centre determined in the + Monte Carlo procedure. The actual number of dots written is + controlled by ``dot_density`` and the ``sample`` level of the + original analysis. ``dot_density`` should be set between 5 + (few dots per sphere) and 35 (many dots per sphere). + Default: 15 + + + Returns + ------- + str + the output filename for the VMD surface + + """ + fd, tmp_sos = tempfile.mkstemp(suffix=".sos", text=True) + os.close(fd) + + sph_process_path = util.which(sph_process) + if sph_process_path is None: + raise OSError(errno.ENOENT, exe_err.format(name=sph_process, + kw='sph_process')) + base_path = os.path.dirname(sph_process_path) + + sos_triangle_path = util.which(sos_triangle) + if sos_triangle_path is None: + path = os.path.join(base_path, sos_triangle) + sos_triangle_path = util.which(path) + if sos_triangle_path is None: + raise OSError(errno.ENOENT, exe_err.format(name=sos_triangle, + kw='sos_triangle')) + try: + output = subprocess.check_output([sph_process_path, "-sos", "-dotden", + str(dot_density), "-color", sphpdb, + tmp_sos], stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as err: + os.unlink(tmp_sos) + logger.fatal("sph_process failed ({0})".format(err.returncode)) + six.raise_from(OSError(err.returncode, "sph_process failed"), None) + except: + os.unlink(tmp_sos) + raise + + if filename is None: + fd, filename = tempfile.mkstemp(suffix=".sos", text=True) + os.close(fd) + try: + # Could check: os.devnull if subprocess.DEVNULL not available (>3.3) + # Suppress stderr messages of sos_triangle + with open(tmp_sos) as sos, open(filename, "w") as triangles, \ + open(os.devnull, 'w') as FNULL: + subprocess.check_call( + [sos_triangle_path, "-s"], stdin=sos, stdout=triangles, + stderr=FNULL) + except subprocess.CalledProcessError as err: + logger.fatal("sos_triangle failed ({0})".format(err.returncode)) + six.raise_from(OSError(err.returncode, "sos_triangle failed"), None) + finally: + os.unlink(tmp_sos) + + return filename diff --git a/package/doc/sphinx/source/documentation_pages/analysis/hole2.rst b/package/doc/sphinx/source/documentation_pages/analysis/hole2.rst new file mode 100644 index 00000000000..856f84ec21c --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/analysis/hole2.rst @@ -0,0 +1,35 @@ +=================================================== + HOLE analysis --- :mod:`MDAnalysis.analysis.hole2` +=================================================== + +:Author: Lily Wang +:Year: 2020 +:Copyright: GNU Public License v3 + +.. versionadded:: 1.0.0 + +This module provides an updated interface for the HOLE_ suite of tools [Smart1993]_ +[Smart1996]_ to analyse an ion channel pore or transporter pathway [Stelzl2014]_ +as a function of time or arbitrary order parameters. It replaces :mod:`MDAnalysis.analysis.hole`. + +HOLE_ must be installed separately and can be obtained in binary form +from http://www.holeprogram.org/ or as source from +https://github.com/osmart/hole2. (HOLE is open source and available +under the Apache v2.0 license.) + +Module +------ + +.. automodule:: MDAnalysis.analysis.hole2.hole + :members: + + +Utility functions and templates +------------------------------- + +.. automodule:: MDAnalysis.analysis.hole2.utils + :members: + + +.. automodule:: MDAnalysis.analysis.hole2.templates + :members: \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index ad145f0cec1..19081f1b5dd 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -78,6 +78,7 @@ Membranes and membrane proteins :maxdepth: 1 analysis/hole + analysis/hole2 analysis/leaflet Nucleic acids diff --git a/testsuite/MDAnalysisTests/analysis/test_hole2.py b/testsuite/MDAnalysisTests/analysis/test_hole2.py new file mode 100644 index 00000000000..d7a02a7baf6 --- /dev/null +++ b/testsuite/MDAnalysisTests/analysis/test_hole2.py @@ -0,0 +1,733 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +from __future__ import print_function, absolute_import, division + +from six.moves import range +import pytest +import glob +import os +import sys +import textwrap + +import numpy as np +import matplotlib +import mpl_toolkits.mplot3d +import errno +from numpy.testing import ( + assert_almost_equal, + assert_equal, +) + +import MDAnalysis as mda +from MDAnalysis.analysis import hole2 +from MDAnalysis.analysis.hole2.utils import check_and_fix_long_filename +from MDAnalysis.exceptions import ApplicationError +from MDAnalysisTests.datafiles import PDB_HOLE, MULTIPDB_HOLE, PSF, DCD +from MDAnalysisTests import executable_not_found + + +def rlimits_missing(): + # return True if resources module not accesible (ie setting of rlimits) + try: + # on Unix we can manipulate our limits: http://docs.python.org/2/library/resource.html + import resource + + soft_max_open_files, hard_max_open_files = resource.getrlimit( + resource.RLIMIT_NOFILE) + except ImportError: + return True + return False + +class TestCheckAndFixLongFilename(object): + + max_length = 70 + filename = 'a'*(max_length-4) + '.pdb' + + def test_short(self): + fixed = check_and_fix_long_filename(self.filename) + assert self.filename == fixed + + def test_relative(self): + abspath = os.path.abspath(self.filename) + if len(abspath) > self.max_length: + fixed = check_and_fix_long_filename(abspath) + assert fixed == self.filename + + def test_symlink_dir(self, tmpdir): + dirname = 'really_'*20 +'long_name' + short_name = self.filename[-20:] + path = os.path.join(dirname, short_name) + with tmpdir.as_cwd(): + os.makedirs(dirname) + u = mda.Universe(PDB_HOLE) + u.atoms.write(path) + + fixed = check_and_fix_long_filename(path) + assert os.path.islink(fixed) + assert fixed.endswith(short_name) + + def test_symlink_file(self, tmpdir): + long_name = 'a'*10 + self.filename + + with tmpdir.as_cwd(): + fixed = check_and_fix_long_filename(long_name) + assert os.path.islink(fixed) + assert not fixed.endswith(long_name) + + +@pytest.mark.skipif(executable_not_found("hole"), + reason="Test skipped because HOLE not found") +class TestHole(object): + filename = PDB_HOLE + random_seed = 31415 + profile_length = 425 + rxn_coord_mean = -1.41225 + radius_min = 1.19707 + + def test_correct_input(self, tmpdir): + with tmpdir.as_cwd(): + hole2.hole(self.filename, random_seed=self.random_seed, + infile='hole.inp') + + infile = str(tmpdir.join('hole.inp')) + with open(infile, 'r') as f: + contents = f.read() + + hole_input = textwrap.dedent(""" + RADIUS simple2.rad + SPHPDB hole.sph + SAMPLE 0.200000 + ENDRAD 22.000000 + IGNORE SOL WAT TIP HOH K NA CL + SHORTO 0 + RASEED 31415 + """) + + # don't check filenames + assert contents.endswith(hole_input) + + def test_input_options(self, tmpdir): + u = mda.Universe(PDB_HOLE) + cog = u.select_atoms('protein').center_of_geometry() + + with tmpdir.as_cwd(): + hole2.hole(self.filename, random_seed=self.random_seed, + infile='hole.inp', cpoint=cog, + ignore_residues=[]) + + infile = str(tmpdir.join('hole.inp')) + with open(infile, 'r') as f: + contents = f.read() + + hole_input = textwrap.dedent(""" + RADIUS simple2.rad + SPHPDB hole.sph + SAMPLE 0.200000 + ENDRAD 22.000000 + IGNORE + SHORTO 0 + RASEED 31415 + CPOINT -0.0180961507 -0.0122730583 4.1497999943 + """) + + # don't check filenames + assert contents.endswith(hole_input) + + def test_correct_profile_values(self, tmpdir): + with tmpdir.as_cwd(): + profiles = hole2.hole(self.filename, random_seed=self.random_seed) + + values = list(profiles.values()) + assert_equal(len(values), 1, + err_msg='Should only have 1 HOLE profile') + profile = values[0] + assert_equal(len(profile), self.profile_length, + err_msg='Wrong number of points in HOLE profile') + assert_almost_equal(profile.rxn_coord.mean(), + self.rxn_coord_mean, + err_msg='Wrong mean HOLE rxn_coord') + assert_almost_equal(profile.radius.min(), + self.radius_min, + err_msg='Wrong minimum HOLE radius') + + # HOLE cannot read NAMD CHARMM files as is written by MDAnalysis + # fails with Linux HOLE? + # def test_dcd(self, tmpdir): + # with tmpdir.as_cwd(): + # u = mda.Universe(PSF, DCD) + # n_frames = len(u.trajectory) + # keep_frames = 12 + # step = 3 + # skip = n_frames-keep_frames + # filename = 'temp.pdb' + # u.atoms.write(filename) + + # profiles = hole2.hole(filename, random_seed=self.random_seed, dcd=DCD, + # dcd_iniskip=skip, + # dcd_step=step, infile='hole.inp') + # with open('hole.inp', 'r') as f: + # contents = f.read() + # assert contents.endswith('CHARMS {} {}\n'.format(skip, step-1)) + + # assert_equal(len(profiles), int(keep_frames/step)) + + def test_application_error(self, tmpdir): + with tmpdir.as_cwd(): + with pytest.raises(ApplicationError): + hole2.hole(self.filename, dcd=DCD) + + def test_output_level(self, tmpdir): + with tmpdir.as_cwd(): + with pytest.warns(UserWarning) as rec: + profiles = hole2.hole(self.filename, random_seed=self.random_seed, + output_level=100) + assert len(rec) == 1 + assert 'needs to be < 3' in rec[0].message.args[0] + # no profiles + assert len(profiles) == 0 + + def test_keep_files(self, tmpdir): + with tmpdir.as_cwd(): + hole2.hole(self.filename, random_seed=self.random_seed, + infile='hole.inp', + keep_files=False) + sphpdbs = tmpdir.join('*.sph') + assert len(glob.glob(str(sphpdbs))) == 0 + outfiles = tmpdir.join('*.out') + assert len(glob.glob(str(outfiles))) == 0 + vdwradii = tmpdir.join('simple2.rad') + assert len(glob.glob(str(vdwradii))) == 0 + pdbfiles = tmpdir.join('*.pdb') + assert len(glob.glob(str(pdbfiles))) == 0 + oldfiles = tmpdir.join('*.old') + assert len(glob.glob(str(oldfiles))) == 0 + + +@pytest.mark.skipif(executable_not_found("hole"), + reason="Test skipped because HOLE not found") +class BaseTestHole(object): + filename = MULTIPDB_HOLE + start = 5 + stop = 7 + random_seed = 31415 + + # HOLE is so slow so we only run it once and keep it in + # the class; note that you may not change universe.trajectory + # (eg iteration) because this is not safe in parallel + @pytest.fixture() + def universe(self): + return mda.Universe(self.filename) + + @pytest.fixture() + def hole(self, universe, tmpdir): + with tmpdir.as_cwd(): + h = hole2.HoleAnalysis(universe) + h.run(start=self.start, stop=self.stop, + random_seed=self.random_seed) + return h + + @pytest.fixture() + def frames(self, universe): + return [ts.frame for ts in universe.trajectory[self.start:self.stop]] + + @pytest.fixture() + def profiles(self, hole, frames): + return [hole.profiles[f] for f in frames] + + +class TestHoleAnalysis(BaseTestHole): + + def test_correct_profile_values(self, hole, frames): + assert_equal(sorted(hole.profiles.keys()), frames, + err_msg="hole.profiles.keys() should contain the frame numbers") + assert_equal(list(hole.frames), frames, + err_msg="hole.frames should contain the frame numbers") + data = np.transpose([(len(p), p.rxn_coord.mean(), p.radius.min()) + for p in hole.profiles.values()]) + assert_equal(data[0], [401, 399], err_msg="incorrect profile lengths") + assert_almost_equal(data[1], [1.98767, 0.0878], + err_msg="wrong mean HOLE rxn_coord") + assert_almost_equal(data[2], [1.19819, 1.29628], + err_msg="wrong minimum radius") + + def test_min_radius(self, hole): + values = np.array([[5., 1.19819], + [6., 1.29628]]) + assert_almost_equal(hole.min_radius(), values, + err_msg="min_radius() array not correct") + + def test_context_manager(self, universe, tmpdir): + with tmpdir.as_cwd(): + with hole2.HoleAnalysis(universe) as h: + h.run() + h.run() # run again for *.old files + h.create_vmd_surface(filename='hole.vmd') + + sphpdbs = tmpdir.join('*.sph') + assert len(glob.glob(str(sphpdbs))) == 0 + outfiles = tmpdir.join('*.out') + assert len(glob.glob(str(outfiles))) == 0 + vdwradii = tmpdir.join('simple2.rad') + assert len(glob.glob(str(vdwradii))) == 0 + pdbfiles = tmpdir.join('*.pdb') + assert len(glob.glob(str(pdbfiles))) == 0 + oldfiles = tmpdir.join('*.old') + assert len(glob.glob(str(oldfiles))) == 0 + vmd_file = tmpdir.join('hole.vmd') + assert len(glob.glob(str(vmd_file))) == 1 + + def test_output_level(self, tmpdir, universe): + with tmpdir.as_cwd(): + with pytest.warns(UserWarning) as rec: + h = hole2.HoleAnalysis(universe, + output_level=100) + h.run(start=self.start, + stop=self.stop, random_seed=self.random_seed) + assert len(rec) == 1 + assert 'needs to be < 3' in rec[0].message.args[0] + # no profiles + assert len(h.profiles) == 0 + + def test_cpoint_geometry(self, tmpdir, universe): + protein = universe.select_atoms('protein') + cogs = [protein.center_of_geometry() for ts in universe.trajectory] + with tmpdir.as_cwd(): + h = hole2.HoleAnalysis(universe, + select='protein', + cpoint='center_of_geometry', + write_input_files=True) + h.run(start=self.start, + stop=self.stop, random_seed=self.random_seed) + + infiles = sorted(glob.glob(str(tmpdir.join('hole*.inp')))) + for file, cog in zip(infiles, cogs[self.start:self.stop]): + with open(file, 'r') as f: + line = f.read().split('CPOINT')[1].split('\n')[0] + arr = np.array(list(map(float, line.split()))) + assert_almost_equal(arr, cog) + + # plotting + def test_plot(self, hole, frames, profiles): + ax = hole.plot(label=True, frames=None, y_shift=1) + err_msg = "HoleAnalysis.plot() did not produce an Axes instance" + assert isinstance(ax, matplotlib.axes.Axes), err_msg + lines = ax.get_lines()[:] + assert len(lines) == hole.n_frames + for i, (line, frame, profile) in enumerate(zip(lines, frames, profiles)): + x, y = line.get_data() + assert_almost_equal(x, profile.rxn_coord) + assert_almost_equal(y, profile.radius + i) + assert line.get_label() == str(frame) + + def test_plot_mean_profile(self, hole, frames, profiles): + binned, bins = hole.bin_radii(bins=100) + mean = np.array(list(map(np.mean, binned))) + stds = np.array(list(map(np.std, binned))) + midpoints = 0.5 * bins[1:] + bins[:-1] + ylow = list(mean-(2*stds)) + yhigh = list(mean+(2*stds)) + + ax = hole.plot_mean_profile(bins=100, n_std=2) + + # test fillbetween standard deviation + children = ax.get_children() + poly = [] + for x in children: + if isinstance(x, matplotlib.collections.PolyCollection): + poly.append(x) + assert len(poly) == 1 + xp, yp = poly[0].get_paths()[0].vertices.T + assert_almost_equal(np.unique(xp), np.unique(midpoints)) + assert_almost_equal(np.unique(yp), np.unique(ylow+yhigh)) + + # test mean line + lines = ax.get_lines() + assert len(lines) == 1 + xl, yl = lines[0].get_data() + assert_almost_equal(xl, midpoints) + assert_almost_equal(yl, mean) + + @pytest.mark.skipif(sys.version_info < (3, 1), + reason="get_data_3d requires 3.1 or higher") + def test_plot3D(self, hole, frames, profiles): + ax = hole.plot3D(frames=None, r_max=None) + err_msg = "HoleAnalysis.plot3D() did not produce an Axes3D instance" + assert isinstance(ax, mpl_toolkits.mplot3d.Axes3D), err_msg + lines = ax.get_lines()[:] + assert len(lines) == hole.n_frames + + for line, frame, profile in zip(lines, frames, profiles): + x, y, z = line.get_data_3d() + assert_almost_equal(x, profile.rxn_coord) + assert_almost_equal(np.unique(y), [frame]) + assert_almost_equal(z, profile.radius) + assert line.get_label() == str(frame) + + @pytest.mark.skipif(sys.version_info < (3, 1), + reason="get_data_3d requires 3.1 or higher") + def test_plot3D_rmax(self, hole, frames, profiles): + ax = hole.plot3D(r_max=2.5) + err_msg = "HoleAnalysis.plot3D(rmax=float) did not produce an Axes3D instance" + assert isinstance(ax, mpl_toolkits.mplot3d.Axes3D), err_msg + + lines = ax.get_lines()[:] + + for line, frame, profile in zip(lines, frames, profiles): + x, y, z = line.get_data_3d() + assert_almost_equal(x, profile.rxn_coord) + assert_almost_equal(np.unique(y), [frame]) + radius = np.where(profile.radius>2.5, np.nan, profile.radius) + assert_almost_equal(z, radius) + assert line.get_label() == str(frame) + + @pytest.mark.skipif(sys.version_info > (3, 1), + reason="get_data_3d requires 3.1 or higher") + def test_plot3D(self, hole, frames, profiles): + ax = hole.plot3D(frames=None, r_max=None) + err_msg = "HoleAnalysis.plot3D() did not produce an Axes3D instance" + assert isinstance(ax, mpl_toolkits.mplot3d.Axes3D), err_msg + lines = ax.get_lines()[:] + assert len(lines) == hole.n_frames + + for line, frame, profile in zip(lines, frames, profiles): + x, y = line.get_data() + assert_almost_equal(x, profile.rxn_coord) + assert_almost_equal(np.unique(y), [frame]) + assert line.get_label() == str(frame) + + @pytest.mark.skipif(sys.version_info > (3, 1), + reason="get_data_3d requires 3.1 or higher") + def test_plot3D_rmax(self, hole, frames, profiles): + ax = hole.plot3D(r_max=2.5) + err_msg = "HoleAnalysis.plot3D(rmax=float) did not produce an Axes3D instance" + assert isinstance(ax, mpl_toolkits.mplot3d.Axes3D), err_msg + + lines = ax.get_lines()[:] + + for line, frame, profile in zip(lines, frames, profiles): + x, y = line.get_data() + assert_almost_equal(x, profile.rxn_coord) + assert_almost_equal(np.unique(y), [frame]) + assert line.get_label() == str(frame) + + +class TestHoleAnalysisLong(BaseTestHole): + + start = 0 + stop = 11 + + rmsd = np.array([6.10501252e+00, 4.88398472e+00, 3.66303524e+00, 2.44202454e+00, + 1.22100521e+00, 1.67285541e-07, 1.22100162e+00, 2.44202456e+00, + 3.66303410e+00, 4.88398478e+00, 6.10502262e+00]) + + @pytest.fixture + def order_parameter_keys_values(self, hole): + op = hole.over_order_parameters(self.rmsd, frames=None) + return op.keys(), op.values() + + def test_gather(self, hole): + gd = hole.gather(flat=False) + for i, p in enumerate(hole.profiles.values()): + assert_almost_equal(p.rxn_coord, gd['rxn_coord'][i]) + assert_almost_equal(p.radius, gd['radius'][i]) + assert_almost_equal(p.cen_line_D, gd['cen_line_D'][i]) + + def test_gather_flat(self, hole): + gd = hole.gather(flat=True) + i = 0 + for p in hole.profiles.values(): + j = i+len(p.rxn_coord) + assert_almost_equal(p.rxn_coord, gd['rxn_coord'][i:j]) + assert_almost_equal(p.radius, gd['radius'][i:j]) + assert_almost_equal(p.cen_line_D, gd['cen_line_D'][i:j]) + i = j + assert_equal(i, len(gd['rxn_coord'])) + + def test_min_radius(self, hole): + rad = hole.min_radius() + for (f1, p), (f2, r) in zip(hole.profiles.items(), rad): + assert_equal(f1, f2) + assert_almost_equal(min(p.radius), r) + + def test_over_order_parameters(self, hole): + op = self.rmsd + profiles = hole.over_order_parameters(op, frames=None) + assert len(op) == len(profiles) + + for key, rmsd in zip(profiles.keys(), np.sort(op)): + assert key == rmsd + + idx = np.argsort(op) + arr = np.array(list(hole.profiles.values())) + for op_prof, arr_prof in zip(profiles.values(), arr[idx]): + assert op_prof is arr_prof + + def test_over_order_parameters_file(self, hole, tmpdir): + op = self.rmsd + with tmpdir.as_cwd(): + np.savetxt('rmsd.dat', self.rmsd) + profiles = hole.over_order_parameters('rmsd.dat', frames=None) + + assert len(op) == len(profiles) + + for key, rmsd in zip(profiles.keys(), np.sort(op)): + assert key == rmsd + + idx = np.argsort(op) + arr = np.array(list(hole.profiles.values())) + for op_prof, arr_prof in zip(profiles.values(), arr[idx]): + assert op_prof is arr_prof + + def test_over_order_parameters_missing_file(self, hole): + with pytest.raises(ValueError) as exc: + prof = hole.over_order_parameters('missing.dat') + assert 'not found' in str(exc.value) + + def test_over_order_parameters_invalid_file(self, hole): + with pytest.raises(ValueError) as exc: + prof = hole.over_order_parameters(PDB_HOLE) + assert 'Could not parse' in str(exc.value) + + def test_over_order_parameters_frames(self, hole): + op = self.rmsd + n_frames = 7 + profiles = hole.over_order_parameters(op, frames=np.arange(n_frames)) + assert len(profiles) == n_frames + for key, rmsd in zip(profiles.keys(), np.sort(op[:n_frames])): + assert key == rmsd + + idx = np.argsort(op[:n_frames]) + values = list(hole.profiles.values())[:n_frames] + arr = np.array(values) + for op_prof, arr_prof in zip(profiles.values(), arr[idx]): + assert op_prof is arr_prof + + def test_bin_radii(self, hole): + radii, bins = hole.bin_radii(bins=100) + dct = hole.gather(flat=True) + coords = dct['rxn_coord'] + + assert len(bins) == 101 + assert_almost_equal(bins[0], coords.min()) + assert_almost_equal(bins[-1], coords.max()) + assert len(radii) == (len(bins)-1) + + # check first frame profile + first = hole.profiles[0] + for row in first: + coord = row.rxn_coord + rad = row.radius + for i, (lower, upper) in enumerate(zip(bins[:-1], bins[1:])): + if coord > lower and coord <= upper: + assert rad in radii[i] + break + else: + raise AssertionError('Radius not in binned radii') + + @pytest.mark.parametrize('midpoint', [1.5, 1.8, 2.0, 2.5]) + def test_bin_radii_range(self, hole, midpoint): + radii, bins = hole.bin_radii(bins=100, + range=(midpoint, midpoint)) + dct = hole.gather(flat=True) + coords = dct['rxn_coord'] + + assert len(bins) == 101 + low = midpoint - 0.5 + high = midpoint + 0.5 + assert_almost_equal(bins[0], low) + assert_almost_equal(bins[-1], high) + assert len(radii) == (len(bins)-1) + + # check first frame profile + first = hole.profiles[0] + for row in first: + coord = row.rxn_coord + rad = row.radius + if coord > low and coord <= high: + for i, (lower, upper) in enumerate(zip(bins[:-1], bins[1:])): + if coord > lower and coord <= upper: + assert rad in radii[i] + break + else: + raise AssertionError('Radius not in binned radii') + else: + assert not any([rad in x for x in radii]) + + def test_bin_radii_edges(self, hole): + brange = list(np.linspace(1.0, 2.0, num=101, endpoint=True)) + moved = brange[30:] + brange[10:30] + brange[:10] + e_radii, e_bins = hole.bin_radii(bins=moved, range=(0.0, 0.0)) + r_radii, r_bins = hole.bin_radii(bins=100, range=(1.5, 1.5)) + assert_almost_equal(e_bins, r_bins) + for e, r in zip(e_radii, r_radii): + assert_almost_equal(e, r) + + def test_histogram_radii(self, hole): + means, _ = hole.histogram_radii(aggregator=np.mean, + bins=100) + radii, _ = hole.bin_radii(bins=100) + assert means.shape == (100,) + for r, m in zip(radii, means): + assert_almost_equal(r.mean(), m) + + # plotting + + def test_plot_select_frames(self, hole, frames, profiles): + ax = hole.plot(label=True, frames=[2, 3], y_shift=1) + err_msg = "HoleAnalysis.plot() did not produce an Axes instance" + assert isinstance(ax, matplotlib.axes.Axes), err_msg + lines = ax.get_lines()[:] + assert len(lines) == 2 + for i, (line, frame, profile) in enumerate(zip(lines, frames[2:4], profiles[2:4])): + x, y = line.get_data() + assert_almost_equal(x, profile.rxn_coord) + assert_almost_equal(y, profile.radius + i) + assert line.get_label() == str(frame) + + @pytest.mark.parametrize('agg', [np.max, np.mean, np.std, np.min]) + def test_plot_order_parameters(self, hole, order_parameter_keys_values, + agg): + opx = np.array(list(order_parameter_keys_values[0])) + opy = np.array([agg(p.radius) for p in order_parameter_keys_values[1]]) + ax = hole.plot_order_parameters(self.rmsd, aggregator=agg, frames=None) + err_msg = ("HoleAnalysis.plot_order_parameters()" + "did not produce an Axes instance") + assert isinstance(ax, matplotlib.axes.Axes), err_msg + + lines = ax.get_lines() + assert len(lines) == 1 + x, y = lines[0].get_data() + assert_almost_equal(x, opx) + assert_almost_equal(y, opy) + + @pytest.mark.skipif(sys.version_info < (3, 1), + reason="get_data_3d requires 3.1 or higher") + def test_plot3D_order_parameters(self, hole, order_parameter_keys_values): + opx = np.array(list(order_parameter_keys_values[0])) + profiles = np.array(list(order_parameter_keys_values[1])) + ax = hole.plot3D_order_parameters(self.rmsd, frames=None) + err_msg = ("HoleAnalysis.plot3D_order_parameters() " + "did not produce an Axes3D instance") + assert isinstance(ax, mpl_toolkits.mplot3d.Axes3D), err_msg + + lines = ax.get_lines() + assert len(lines) == hole.n_frames + for line, opx_, profile in zip(lines, opx, profiles): + x, y, z = line.get_data_3d() + assert_almost_equal(x, profile.rxn_coord) + assert_almost_equal(np.unique(y), np.array([opx_])) + assert_almost_equal(z, profile.radius) + + @pytest.mark.skipif(sys.version_info > (3, 1), + reason="get_data_3d requires 3.1 or higher") + def test_plot3D_order_parameters(self, hole, order_parameter_keys_values): + opx = np.array(list(order_parameter_keys_values[0])) + profiles = np.array(list(order_parameter_keys_values[1])) + ax = hole.plot3D_order_parameters(self.rmsd, frames=None) + err_msg = ("HoleAnalysis.plot3D_order_parameters() " + "did not produce an Axes3D instance") + assert isinstance(ax, mpl_toolkits.mplot3d.Axes3D), err_msg + + lines = ax.get_lines() + assert len(lines) == hole.n_frames + for line, opx_, profile in zip(lines, opx, profiles): + x, y = line.get_data() + assert_almost_equal(x, profile.rxn_coord) + assert_almost_equal(np.unique(y), np.array([opx_])) + +@pytest.mark.skipif(executable_not_found("hole"), + reason="Test skipped because HOLE not found") +class TestHoleModule(object): + try: + # on Unix we can manipulate our limits: http://docs.python.org/2/library/resource.html + import resource + soft_max_open_files, hard_max_open_files = resource.getrlimit( + resource.RLIMIT_NOFILE) + except ImportError: + pass + + @staticmethod + @pytest.fixture() + def universe(): + return mda.Universe(MULTIPDB_HOLE) + + @pytest.mark.skipif(rlimits_missing, + reason="Test skipped because platform does not allow setting rlimits") + def test_hole_module_fd_closure(self, universe, tmpdir): + """test open file descriptors are closed (MDAnalysisTests.analysis.test_hole.TestHoleModule): Issue 129""" + # If Issue 129 isn't resolved, this function will produce an OSError on + # the system, and cause many other tests to fail as well. + # + # Successful test takes ~10 s, failure ~2 s. + + # Hasten failure by setting "ulimit -n 64" (can't go too low because of open modules etc...) + import resource + + # ----- temporary hack ----- + # on Mac OS X (on Travis) we run out of open file descriptors + # before even starting this test (see + # https://github.com/MDAnalysis/mdanalysis/pull/901#issuecomment-231938093); + # if this issue is solved by #363 then revert the following + # hack: + # + import platform + if platform.platform() == "Darwin": + max_open_files = 512 + else: + max_open_files = 64 + # + # -------------------------- + + resource.setrlimit(resource.RLIMIT_NOFILE, + (max_open_files, self.hard_max_open_files)) + + with tmpdir.as_cwd(): + try: + H = hole2.HoleAnalysis(universe, cvect=[0, 1, 0], sample=20.0) + finally: + self._restore_rlimits() + + # pretty unlikely that the code will get through 2 rounds if the MDA + # issue 129 isn't fixed, although this depends on the file descriptor + # open limit for the machine in question + try: + for i in range(2): + # will typically get an OSError for too many files being open after + # about 2 seconds if issue 129 isn't resolved + H.run() + except OSError as err: + if err.errno == errno.EMFILE: + raise pytest.fail( + "hole2.HoleAnalysis does not close file descriptors (Issue 129)") + raise + finally: + # make sure to restore open file limit !! + self._restore_rlimits() + + def _restore_rlimits(self): + try: + import resource + resource.setrlimit(resource.RLIMIT_NOFILE, + (self.soft_max_open_files, self.hard_max_open_files)) + except ImportError: + pass