From 7563a706cf00dd70f3f3b5d1de635dd5a6f92e66 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Fri, 17 Jul 2020 19:18:07 +0100 Subject: [PATCH] fixes run's single method (#85) * Fixes #82 * Changes: * input arguments now reflects the changes made to read_molecule_file in #84 * Writing of pKa file is now optional (default behaviour has been kept). This will be particularly useful downstream where we would just want to have access to the MoleculeContainer object. * new test_run file specific for testing run. * add tests * add docs --- docs/source/conf.py | 3 ++ propka/input.py | 27 ++++++------ propka/run.py | 88 ++++++++++++++++++++++++++++++-------- tests/test_run.py | 101 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 31 deletions(-) create mode 100644 tests/test_run.py diff --git a/docs/source/conf.py b/docs/source/conf.py index 1ef14ee..a3d22ec 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -73,3 +73,6 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] + +# Enable intersphinx mapping +intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} diff --git a/propka/input.py b/propka/input.py index de17149..4d12c19 100644 --- a/propka/input.py +++ b/propka/input.py @@ -7,7 +7,6 @@ from pathlib import Path from pkg_resources import resource_filename from propka.lib import protein_precheck -from propka.output import write_propka from propka.atom import Atom from propka.conformation_container import ConformationContainer from propka.group import initialize_atom_group @@ -41,34 +40,36 @@ def read_molecule_file(filename: str, mol_container, stream=None): Args: filename(str): name of input file. If not using a filestream via the ``stream`` argument, should be a path to the file to be read. - mol_container: MolecularContainer object. + mol_container: :class:`~propka.molecular_container.MolecularContainer` + object. stream: optional filestream handle. If ``None``, then open ``filename`` as a local file for reading. Returns: - updated MolecularContainer object + updated :class:`~propka.molecular_container.MolecularContainer` object. Raises: - ValuError if invalid input given + ValuError: if invalid input given Examples: There are two main cases for using ``read_molecule_file``. The first (and most common) is to pass the input file (``filename``) as a string which gives the path of the molecule file to be read (here we - also pass a ``MoleculeContainer`` object named ``mol_container``). + also pass a :class:`~propka.molecular_container.MolecularContainer` + object named ``mol_container``). >>> read_molecule_file('test.pdb', mol_container) The other use case is when passing a file-like object, e.g. a - ``StringIO`` class, instance. This is done by passing the object via - the ``stream`` argument. Since file-like objects do not usually have - an associated file name, an appropirate file name should be passed to - the ``filename`` argument. In this case, ``filename`` is not opened for - reading, but instead is used to help recognise the file type (based on - the extension being either `.pdb` or `.propka_input`) and also uses - that given ``filename`` to assign a name to the input - MolecularContainer object. + :class:`io.StringIO` class, instance. This is done by passing the + object via the ``stream`` argument. Since file-like objects do not + usually have an associated file name, an appropirate file name should + be passed to the ``filename`` argument. In this case, ``filename`` is + not opened for reading, but instead is used to help recognise the file + type (based on the extension being either `.pdb` or `.propka_input`) + and also uses that given ``filename`` to assign a name to the input + :class:`~propka.molecular_container.MolecularContainer` object. >>> read_molecule_file('test.pdb', mol_container, stream=string_io_object) diff --git a/propka/run.py b/propka/run.py index a012c52..eed95f2 100644 --- a/propka/run.py +++ b/propka/run.py @@ -36,34 +36,86 @@ def main(optargs=None): my_molecule.write_propka() -def single(pdbfile, optargs=None): - """Run a single PROPKA calculation using *pdbfile* as input. +def single(filename: str, optargs: tuple = (), stream=None, + write_pka: bool = True): + """Run a single PROPKA calculation using ``filename`` as input. - Commandline options can be passed as a **list** in *optargs*. + Args: + filename (str): name of input file. If filestream is not passed via + ``stream``, should be a path to the file to be read. + optargs (tuple): Optional, commandline options for propka. Extra files + passed via ``optargs`` will be ignored, see Notes. + stream : optional filestream handle. If ``None``, then ``filename`` + will be used as path to input file for reading. + write_pka (bool): Controls if the pKa file should be writen to disk. - Example - ------- - Given an input file "protein.pdb", run the equivalent of ``propka3 - --mutation=N25R/N181D -v --pH=7.2 protein.pdb`` as:: + Returns: + :class:`~propka.molecular_container.MolecularContainer` object. - propka.run.single("protein.pdb", - optargs=["--mutation=N25R/N181D", "-v", "--pH=7.2"]) + Examples: + Given an input file "protein.pdb", run the equivalent of ``propka3 + --mutation=N25R/N181D -v --pH=7.2 protein.pdb`` as:: + propka.run.single("protein.pdb", + optargs=["--mutation=N25R/N181D", "-v", "--pH=7.2"]) - .. todo:: - Test :func:`single`, not sure if it is correctly processing ``pdbfile``. + By default, a pKa file will be written. However in some cases one may + wish to not output this file and just have access to the + :class:`~propka.molecular_container.MolecularContainer` object. If so, + then pass ``False`` to ``write_pka``:: + + mol = propka.run.single("protein.pdb", write_pka=False) + + In some cases, one may also want to pass a file-like (e.g. + :class:`io.StringIO`) object instead of a file path as a string. In + these cases the file-like object should be passed to the ``stream`` + argument and a string indicating the file type in the ``filename`` + argument; this string only has to look like a valid file name, it does + not need to exist because the data are actually read from ``stream``. + This approach is necessary because file-like objects do not usually + have names, and propka uses the ``filename`` argument to determine the + input file type, and assigns the file name for the + :class:`~propka.molecular_container.MolecularContainer` object:: + + mol = propka.run.single('input.pdb', stream=string_io_file) + + In this case, a PDB file-like object was passed as `string_io_file`. + The resultant pKa file will be written out as `input.pka`. + + Notes: + * Only a single input structure file will be processed, defined by + ``filename`` (and ``stream`` if passing a file-like object). Any + additional files passed via the `-f` or `--file` flag to optargs will + be ignored. + + + .. seealso:: + + :func:`propka.input.read_molecule_file` """ - optargs = optargs if optargs is not None else [] - options = loadOptions(*optargs) - pdbfile = options.filenames.pop(0) + # Deal with input optarg options + optargs = tuple(optargs) + optargs += (filename,) + options = loadOptions(optargs) + parameters = read_parameter_file(options.parameters, Parameters()) - if len(options.filenames) > 0: - _LOGGER.warning("Ignoring filenames: {0:s}".format(options.filenames)) + + # Only filename present should be the one passed via the arguments + # Anything else will probably have been passed using optargs' `-f` flag. + ignored_list = [i for i in options.filenames if i != filename] + if ignored_list: + _LOGGER.warning(f"Ignoring extra filenames passed: {ignored_list}") + options.filenames = [filename] + my_molecule = MolecularContainer(parameters, options) - my_molecule = read_molecule_file(pdbfile, my_molecule) + my_molecule = read_molecule_file(filename, my_molecule, stream=stream) my_molecule.calculate_pka() - my_molecule.write_pka() + + # write outputs if options.generate_propka_input: my_molecule.write_propka() + if write_pka: + my_molecule.write_pka() + return my_molecule diff --git a/tests/test_run.py b/tests/test_run.py new file mode 100644 index 0000000..98af702 --- /dev/null +++ b/tests/test_run.py @@ -0,0 +1,101 @@ +"""Tests for PROPKA's run module""" +import logging +import os +from pathlib import Path +from io import StringIO +import pytest +import propka.run as pkrun + +from .test_basic_regression import compare_output +from .test_streamio import get_paths + + +_LOGGER = logging.getLogger(__name__) + + +@pytest.mark.parametrize("pdb, options", [ + pytest.param("1FTJ-Chain-A", (), id="1FTJ-Chain-A: no options"), + pytest.param('3SGB-subset', ( + "--titrate_only", + "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"), + id="3SGB: --titrate_only"), + pytest.param('1HPX-warn', ('--quiet',), id="1HPX-warn: --quiet"), +]) +def test_single_file(tmpdir, pdb, options): + """Basic regression test using propka.run.single and local file for the + input PDB file""" + ref_path, pdb_path = get_paths(pdb) + filename = str(pdb_path) + + with tmpdir.as_cwd(): + pkrun.single(filename, options) + compare_output(pdb, Path.cwd(), ref_path) + + +@pytest.mark.parametrize("pdb, options", [ + pytest.param("1FTJ-Chain-A", (), id="1FTJ-Chain-A: no options"), + pytest.param('3SGB-subset', ( + "--titrate_only", + "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"), + id="3SGB: --titrate_only"), + pytest.param('1HPX-warn',('--quiet',), id="1HPX-warn: --quiet"), +]) +def test_single_filestream(tmpdir, pdb, options): + """Basic regression test using StringIO streams for the input PDB file""" + ref_path, pdb_path = get_paths(pdb) + filename = f"{pdb}.pdb" + + with open(pdb_path, 'r') as writer: + filestream = StringIO(writer.read()) + + with tmpdir.as_cwd(): + pkrun.single(filename, options, stream=filestream) + compare_output(pdb, Path.cwd(), ref_path) + + filestream.close() + + +def test_single_nopka(tmpdir): + """Basic test to check that the pKa file is not written when write_pka is + `False`""" + pdb = "1FTJ-Chain-A" + ref_path, pdb_path = get_paths(pdb) + filename = f"{pdb}.pdb" + + with open(pdb_path, 'r') as writer: + filestream = StringIO(writer.read()) + + pkrun.single(filename, stream=filestream, write_pka=False) + assert not os.path.isfile(f"{pdb}.pka") + + +def test_single_propka_input(tmpdir): + """Basic test to check that the propka_input file is written when + `--generate-propka-input` is passed""" + pdb = "1FTJ-Chain-A" + options = ('--generate-propka-input',) + ref_path, pdb_path = get_paths(pdb) + filename = f"{pdb}.pdb" + + with open(pdb_path, 'r') as writer: + filestream = StringIO(writer.read()) + + with tmpdir.as_cwd(): + pkrun.single(filename, options, stream=filestream) + assert os.path.isfile(f"{pdb}.propka_input") + + +def test_single_extra_files_logwarn(tmpdir, caplog): + """Tests that a logging warning is thrown if passing files via optargs""" + pdb = "1FTJ-Chain-A" + options = ('-f foo.pdb bar.pdb', '-f test.pdb test2.pdb', + '--generate-propka-input') + ref_path, pdb_path = get_paths(pdb) + filename = str(pdb_path) + + with tmpdir.as_cwd(): + pkrun.single(filename, options) + + wmsg = ("Ignoring extra filenames passed: [' foo.pdb bar.pdb', " + "' test.pdb test2.pdb']") + assert wmsg in caplog.records[0].message