Skip to content

Commit

Permalink
Merge pull request #9 from Electrostatics/nathan/csv
Browse files Browse the repository at this point in the history
Add CSV/Excel output.
  • Loading branch information
sobolevnrm authored Dec 17, 2020
2 parents 1ebb646 + 7d75d90 commit c731995
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 19 deletions.
14 changes: 12 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
# osmolytes

## 1.0.0
## 1.1.0 (forthcoming)

### Additions

* CSV and Excel output

### Changes

* Refactored tests to also test main driver (where relevant)

## 1.0.0 (15-Dec-2020)

### Changes

Expand All @@ -21,6 +31,6 @@ Replaced with `importlib` suggestion from [PyPA](https://packaging.python.org/gu

Fixed problem with insertion codes and added tests for problems in the future.

## 0.0.1
## 0.0.1 (26-Nov-2020)

Initial release.
1 change: 1 addition & 0 deletions osmolytes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
https://doi.org/10.1073/pnas.0507053102
"""
from importlib import metadata

__version__ = metadata.version("osmolytes")
41 changes: 35 additions & 6 deletions osmolytes/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Main driver for module."""
import argparse
import logging
from pathlib import Path
from sys import version_info
import osmolytes
from osmolytes.pqr import parse_pqr_file, count_residues
Expand Down Expand Up @@ -47,24 +48,31 @@ def build_parser():
)
parser.add_argument(
"--surface-output",
nargs=1,
default=None,
help="path for output of surface in XYZ format",
)
parser.add_argument(
"--solvent-radius",
nargs=1,
default=1.4,
type=float,
help="radius of solvent molecule (in Å)",
)
parser.add_argument(
"--surface-points",
nargs=1,
default=2000,
type=int,
help="number of points per atom for constructing surface",
)
parser.add_argument(
"--output",
choices=["xlsx", "csv"],
help="output m-value results in Excel (xlsx) or CSV (csv) format",
)
parser.add_argument(
"--output-dir",
default=".",
help="directory for m-value output"
)
parser.add_argument(
"pqr_path",
help=(
Expand All @@ -75,10 +83,15 @@ def build_parser():
return parser


def main():
"""Main driver."""
def main(args=None):
"""Main entry point.
:param list(str) args: list of strings to parse
:returns: dictionary of results
:rtype: dict
"""
parser = build_parser()
args = parser.parse_args()
args = parser.parse_args(args)
logging.basicConfig(level=getattr(logging, args.log_level))
_LOGGER.info(f"Osmolytes version {osmolytes.__version__}")
_LOGGER.debug(f"Got command-line arguments: {args}")
Expand All @@ -103,3 +116,19 @@ def main():
_LOGGER.info(f"Detailed energies (kcal/mol/M):\n{energy_df.to_string()}")
energies = energy_df.sum(axis=0).sort_values()
_LOGGER.info(f"Summary energies (kcal/mol/M):\n{energies}")
output_dir = Path(args.output_dir)
if args.output == "xlsx":
output_path = output_dir / f"{Path(args.pqr_path).stem}-mvalues.xlsx"
_LOGGER.info(f"Writing energies (kcal/mol/M) to {output_path}")
energy_df.to_excel(output_path)
elif args.output == "csv":
output_path = output_dir / f"{Path(args.pqr_path).stem}-mvalues.csv"
_LOGGER.info(f"Writing energies (kcal/mol/M) to {output_path}")
energy_df.to_csv(output_path)
return {
"args": args,
"atoms": atoms,
"sas": sas,
"energy_df": energy_df,
"energies": energies,
}
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pandas
scipy
pyyaml
pytest
openpyxl
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setuptools.setup(
name="osmolytes",
version="1.0.0",
version="1.1.0",
description=(
"This code attempts to predict the influence of osmolytes on protein "
"stability"
Expand All @@ -30,7 +30,7 @@
package_data={
"": ["data/*.yaml", "tests/data/*.json", "tests/data/*.yaml"]
},
install_requires=["numpy", "scipy", "pyyaml", "pandas"],
install_requires=["numpy", "scipy", "pyyaml", "pandas", "openpyxl"],
tests_require=["pytest"],
entry_points={"console_scripts": ["mvalue=osmolytes.main:main"]},
keywords="science chemistry biophysics biochemistry",
Expand Down
22 changes: 13 additions & 9 deletions tests/test_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
import pandas as pd
import numpy as np
from scipy.stats import linregress
from osmolytes.pqr import parse_pqr_file
from osmolytes.sasa import SolventAccessibleSurface
from osmolytes.energy import transfer_energy
from osmolytes.main import main


_LOGGER = logging.getLogger(__name__)
Expand All @@ -32,15 +30,21 @@ def test_energy(tmp_path):
test_results = {}
for protein in ["1A6F", "1STN", "2BU4"]:
pqr_path = PROTEIN_PATH / f"{protein}.pqr"
with open(pqr_path, "rt") as pqr_file:
atoms = parse_pqr_file(pqr_file)
xyz_path = Path(tmp_path) / f"{protein}.xyz"
sas = SolventAccessibleSurface(
atoms, probe_radius=1.4, num_points=2000, xyz_path=xyz_path
results = main(
[
"--solvent-radius",
"1.4",
"--surface-points",
"2000",
"--surface-output",
str(xyz_path),
str(pqr_path),
]
)
energy_df = transfer_energy(atoms, sas)
energy_df = results["energy_df"]
_LOGGER.info(f"{protein} detailed energies:\n{energy_df.to_string()}")
energies = energy_df.sum(axis=0).sort_values()
energies = results["energies"]
_LOGGER.info(f"{protein} m-values\n{energies}")
for osmolyte, value in energies.iteritems():
key = f"{protein} {osmolyte}"
Expand Down
34 changes: 34 additions & 0 deletions tests/test_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Test m-value prediction model."""
import logging
from pathlib import Path
import pytest
from osmolytes.main import main


_LOGGER = logging.getLogger(__name__)
PROTEIN_PATH = Path("tests/data/proteins")


@pytest.mark.parametrize(
"protein,output_fmt", [("1A6F", "csv"), ("2BU4", "xlsx")]
)
def test_output(protein, output_fmt, tmp_path):
"""Test the output function"""
pqr_path = PROTEIN_PATH / f"{protein}.pqr"
xyz_path = Path(tmp_path) / f"{protein}.xyz"
output_dir = Path(tmp_path)
main(
[
"--solvent-radius",
"1.4",
"--surface-points",
"2000",
"--surface-output",
str(xyz_path),
"--output",
output_fmt,
"--output-dir",
str(output_dir),
str(pqr_path),
]
)

0 comments on commit c731995

Please sign in to comment.