diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..36915ff --- /dev/null +++ b/.gitignore @@ -0,0 +1,104 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +.venv/ +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# IDEs +.idea/ +.vscode/ + +# Rope project settings +.ropeproject + +# mypy +.mypy_cache/ + +# pytest +.pytest_cache/ + +# data +/distributions.png +/G1_DATA/.aa_traj.xtc_offsets.npz +/G1_DATA/.cg_traj.xtc_offsets.npz diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..816ce1c --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,37 @@ +# This is a comment. +# Each line is a file pattern followed by one or more owners. + +# These owners will be the default owners for everything in +# the repo. Unless a later match takes precedence, +# @global-owner1 and @global-owner2 will be requested for +# review when someone opens a pull request. +* @CharlyEmpereurmot + +# Order is important; the last matching pattern takes the most +# precedence. When someone opens a pull request that only +# modifies JS files, only @js-owner and not the global +# owner(s) will be requested for a review. +*.py @CharlyEmpereurmot @giovannidoni + +# You can also use email addresses if you prefer. They'll be +# used to look up users just like we do for commit author +# emails. +# *.go docs@example.com + +# In this example, @doctocat owns any files in the build/logs +# directory at the root of the repository and any of its +# subdirectories. +# /build/logs/ @doctocat + +# The `docs/*` pattern will match files like +# `docs/getting-started.md` but not further nested files like +# `docs/build-app/troubleshooting.md`. +# docs/* docs@example.com + +# In this example, @octocat owns any file in an apps directory +# anywhere in your repository. +# apps/ @octocat + +# In this example, @doctocat owns any file in the `/docs` +# directory in the root of your repository. +# /docs/ @doctocat \ No newline at end of file diff --git a/G1_DATA/cg_map.ndx b/G1_DATA/cg_map.ndx index f7521ed..58eb904 100644 --- a/G1_DATA/cg_map.ndx +++ b/G1_DATA/cg_map.ndx @@ -3,7 +3,7 @@ [ N0 ] 5 8 57 74 [ Nda ] -13 16 18 19 20 +13 16 18 19 20 [ N0 ] 9 10 77 97 [ Nda ] @@ -15,7 +15,7 @@ [ Qd ] 110 113 [ Nda ] -30 33 35 36 37 +30 33 35 36 37 [ N0 ] 26 27 117 137 [ Nda ] diff --git a/G1_DATA/cg_model.itp b/G1_DATA/cg_model.itp index c7c2461..3865f0b 100644 --- a/G1_DATA/cg_model.itp +++ b/G1_DATA/cg_model.itp @@ -7,35 +7,35 @@ G1 1 ; id type resnr residue atom cgnr charge 1 N0 1 G1 A1 1 0.00000 -2 N0 2 G1 A2 2 0.00000 -3 Nda 3 G1 A3 3 0.00000 -4 N0 4 G1 A4 4 0.00000 -5 Nda 5 G1 A5 5 0.00000 -6 Qd 6 G1 A6 6 1.00000 -7 Nda 7 G1 A7 7 0.00000 -8 Qd 8 G1 A8 8 1.00000 -9 Nda 9 G1 A9 9 0.00000 -10 N0 10 G1 A10 10 0.00000 -11 Nda 11 G1 A11 11 0.00000 -12 Qd 12 G1 A12 12 1.00000 -13 Nda 13 G1 A13 13 0.00000 -14 Qd 14 G1 A14 14 1.00000 -15 Nda 15 G1 A15 15 0.00000 -16 N0 16 G1 A16 16 0.00000 -17 Nda 17 G1 A17 17 0.00000 -18 Qd 18 G1 A18 18 1.00000 -19 Nda 19 G1 A19 19 0.00000 -20 Qd 20 G1 A20 20 1.00000 -21 Nda 21 G1 A21 21 0.00000 -22 N0 22 G1 A22 22 0.00000 -23 Nda 23 G1 A23 23 0.00000 -24 Qd 24 G1 A24 24 1.00000 -25 Nda 25 G1 A25 25 0.00000 -26 Qd 26 G1 A26 26 1.00000 +2 N0 1 G1 A2 2 0.00000 +3 Nda 1 G1 A3 3 0.00000 +4 N0 1 G1 A4 4 0.00000 +5 Nda 1 G1 A5 5 0.00000 +6 Qd 1 G1 A6 6 1.00000 +7 Nda 1 G1 A7 7 0.00000 +8 Qd 1 G1 A8 8 1.00000 +9 Nda 1 G1 A9 9 0.00000 +10 N0 1 G1 A10 10 0.00000 +11 Nda 1 G1 A11 11 0.00000 +12 Qd 1 G1 A12 12 1.00000 +13 Nda 1 G1 A13 13 0.00000 +14 Qd 1 G1 A14 14 1.00000 +15 Nda 1 G1 A15 15 0.00000 +16 N0 1 G1 A16 16 0.00000 +17 Nda 1 G1 A17 17 0.00000 +18 Qd 1 G1 A18 18 1.00000 +19 Nda 1 G1 A19 19 0.00000 +20 Qd 1 G1 A20 20 1.00000 +21 Nda 1 G1 A21 21 0.00000 +22 N0 1 G1 A22 22 0.00000 +23 Nda 1 G1 A23 23 0.00000 +24 Qd 1 G1 A24 24 1.00000 +25 Nda 1 G1 A25 25 0.00000 +26 Qd 1 G1 A26 26 1.00000 [ bonds ] -; i j funct length force.c. +; i j funct length force.c. ; bond group 1 1 2 1 0 0 ; B1 @@ -72,7 +72,7 @@ G1 1 [ angles ] -; i j k funct angle force.c. +; i j k funct angle force.c. ; angle group 1 1 2 15 2 120 0 ; A1 @@ -114,3 +114,4 @@ G1 1 22 23 24 2 180 0 ; A5 22 25 26 2 180 0 ; A5 + diff --git a/G1_DATA/cg_topol.tpr b/G1_DATA/cg_topol.tpr new file mode 100755 index 0000000..0ce7463 Binary files /dev/null and b/G1_DATA/cg_topol.tpr differ diff --git a/G1_DATA/cg_traj.xtc b/G1_DATA/cg_traj.xtc new file mode 100755 index 0000000..d968ae3 Binary files /dev/null and b/G1_DATA/cg_traj.xtc differ diff --git a/G1_DATA/start_conf.gro b/G1_DATA/start_conf.gro index 2999091..93198e8 100644 --- a/G1_DATA/start_conf.gro +++ b/G1_DATA/start_conf.gro @@ -1,5 +1,5 @@ g1_GMX.gro created by acpype (Rev: 10101) on Thu Sep 12 17:34:03 2019 t= 0.00000 step= 0 - 1281 + 1282 1PG1 N1 1 2.936 2.913 2.508 1PG1 C2 2 2.896 2.600 2.705 1PG1 H7 3 3.134 3.247 2.318 @@ -26,6 +26,7 @@ g1_GMX.gro created by acpype (Rev: 10101) on Thu Sep 12 17:34:03 2019 t= 0.000 1PG1 H9 24 2.384 3.105 3.680 1PG1 H11 25 3.353 3.308 3.377 1PG1 N7 26 3.705 3.490 3.432 + 1PG1 N7 26 3.705 3.490 3.432 2W W 27 0.084 3.595 3.359 3W W 28 0.460 2.488 2.882 4W W 29 3.165 2.218 0.652 diff --git a/G1_DATA/system.top b/G1_DATA/system.top index dc8a43f..9b934d7 100644 --- a/G1_DATA/system.top +++ b/G1_DATA/system.top @@ -10,5 +10,5 @@ G1 (PAMAM) in water [ molecules ] ; Compound #mols G1 1 -W 1247 -CL- 8 +W 1248 +CL- 8 \ No newline at end of file diff --git a/README.md b/README.md index 5aafd7e..384c9a6 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Swarm-CG -Swarm-CG is designed for automatically optimizing the bonded terms of a coarse-grained (CG) molecular model, in explicit or implicit solvent, with respect to a reference all-atom (AA) trajectory and starting from a preliminary CG model (topology and non-bonded parameters). The package is designed for usage with Gromacs and contains 3 modules for: +Swarm-CG is designed for automatically optimizing the bonded terms of a MARTINI-based coarse-grained (CG) molecular model, in explicit or implicit solvent, with respect to a reference all-atom (AA) trajectory and starting from a preliminary CG model (topology and non-bonded parameters). The package is designed for usage with Gromacs and contains 3 modules for: 1. Evaluating the bonded parametrization of a CG model 2. Optimizing bonded terms of a CG model @@ -8,9 +8,11 @@ Swarm-CG is designed for automatically optimizing the bonded terms of a coarse-g ![Swarm-CG](https://raw.githubusercontent.com/GMPavanLab/Swarm-CG/master/images/TOC_Swarm-CG_paper.png) +Swarm-CG works with MARTINI version 2 or 3. Virtual sites are correctly handled and the AA-to-CG mapping can be interpreted as Center of Mass (COM) or Center of Geometry (COG). + ### Publication -> Empereur-mot, C.; Pesce, L.; Bochicchio, D.; Perego, C.; Pavan, G.M. (2020) Swarm-CG: Automatic Parametrization of Bonded Terms in Coarse-Grained Models of Simple to Complex Molecules via Fuzzy Self-Tuning Particle Swarm Optimization. [ChemRxiv. Preprint](https://doi.org/10.26434/chemrxiv.12613427) +> Empereur-mot, C.; Pesce, L.; Bochicchio, D.; Perego, C.; Pavan, G.M. (2020) Swarm-CG: Automatic Parametrization of Bonded Terms in MARTINI-based Coarse-Grained Models of Simple to Complex Molecules via Fuzzy Self-Tuning Particle Swarm Optimization. [ChemRxiv. Preprint](https://doi.org/10.26434/chemrxiv.12613427) ### Installation & Usage @@ -41,7 +43,7 @@ Here is an ITP file extract from the demonstration data of [PAMAM G1](https://gi The module `scg_evaluate` enables quick evaluation of the fit of bond, angle and dihedral distributions between a CG model trajectory and a reference AA model trajectory of an identical molecule, by producing a single comprehensive figure. - scg_evaluate -aa_tpr G1_DATA/aa_topol.tpr -aa_traj G1_DATA/aa_traj.xtc -cg_map G1_DATA/cg_map.ndx -cg_itp G1_DATA/cg_model.itp -cg_tpr your_cg.tpr -cg_traj your_cg.xtc + scg_evaluate -aa_tpr G1_DATA/aa_topol.tpr -aa_traj G1_DATA/aa_traj.xtc -cg_map G1_DATA/cg_map.ndx -cg_itp G1_DATA/cg_model.itp -cg_tpr G1_DATA/cg_topol.tpr -cg_traj G1_DATA/cg_traj.xtc It can also be used for inspecting AA-mapped distributions exclusively. diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..728e713 --- /dev/null +++ b/environment.yml @@ -0,0 +1,24 @@ +name: swarmcg +channels: + - conda-forge +dependencies: + - cycler=0.10.0 + - dscribe=0.4.0 # 0.3.5 available for linux + - matplotlib=3.2.0 # 3.2.1 available + - mdanalysis>=1.0.0 + - mypy=0.770 + - numpy=1.19.1 + - pandas=1.0.1 + - pip + - pybind11=2.4.3 # 2.5.0 available + - pyemd=0.5.1 + - pylint=2.4.4 + - pytest=5.4.1 + - pytest-runner=5.2 + - python=3.7.6 + - scikit-learn=0.22.2.post1 + - scipy=1.4.1 + - setuptools + - wheel + - pip: + - fst-pso diff --git a/setup.py b/setup.py index 1d85fd6..5865596 100644 --- a/setup.py +++ b/setup.py @@ -1,19 +1,21 @@ from setuptools import setup, find_packages +import swarmcg + with open('README.md') as readme_file: README = readme_file.read() setup( name='swarm-cg', - version='v1.1.2', + version=swarmcg.__version__, description='Tools for automatic parametrization of bonded terms in coarse-grained molecular models, with respect to an all-atom trajectory', - author='Charly Empereur-mot', - author_email='charly.empereur@gmail.com', + author='Charly Empereur-mot', + author_email='charly.empereur@gmail.com', license='MIT', - url = 'https://github.com/GMPavanLab/Swarm-CG', + url = 'https://github.com/GMPavanLab/Swarm-CG', long_description=README, long_description_content_type="text/markdown", - keywords = ['gromacs', 'coarse-grain', 'molecular model', 'optimization', 'force field'], + keywords=['gromacs', 'coarse-grain', 'molecular model', 'optimization', 'force field'], packages=find_packages(include=['swarmcg', 'swarmcg.*']), install_requires=[ 'numpy>=1.16.4', @@ -25,9 +27,9 @@ ], entry_points={ 'console_scripts': [ - 'scg_optimize=swarmcg.optimize_model:main', - 'scg_evaluate=swarmcg.evaluate_model:main', - 'scg_monitor=swarmcg.analyze_optimization:main' + 'scg_optimize=swarmcg.optimize_model:main', + 'scg_evaluate=swarmcg.evaluate_model:main', + 'scg_monitor=swarmcg.analyze_optimization:main' ] } ) diff --git a/swarmcg/__init__.py b/swarmcg/__init__.py index e69de29..7bb021e 100644 --- a/swarmcg/__init__.py +++ b/swarmcg/__init__.py @@ -0,0 +1 @@ +__version__ = '1.1.3' diff --git a/swarmcg/analyze_optimization.py b/swarmcg/analyze_optimization.py index 2083889..a8a36d4 100644 --- a/swarmcg/analyze_optimization.py +++ b/swarmcg/analyze_optimization.py @@ -1,20 +1,24 @@ -import warnings - # some numpy version have this ufunc warning at import + many packages call numpy and display annoying warnings +import warnings warnings.filterwarnings("ignore") -import matplotlib.pyplot as plt -from matplotlib.ticker import MaxNLocator -import numpy as np -# from pylab import polyfit import os, sys from argparse import ArgumentParser, RawTextHelpFormatter, SUPPRESS from shlex import quote as cmd_quote -from . import config -from . import swarmCG as scg + +import matplotlib.pyplot as plt +import numpy as np +from matplotlib.ticker import MaxNLocator + +import swarmcg.shared.styling +from swarmcg import config +from swarmcg.shared import exceptions +from swarmcg.shared.styling import ANALYSE_DESCR +from swarmcg.shared.utils import forward_fill + warnings.resetwarnings() -def main(): +def run(ns): # TODO: print some text to tell user if opti run finished or not -- then we can only look at the results files, not the running processes on the machine @@ -30,61 +34,6 @@ def main(): plt.rcParams['axes.axisbelow'] = True - print(scg.header_package(' Module: Optimization run analysis\n')) - - args_parser = ArgumentParser(description='''\ -This module produces a visual summary (big plot) of an optimization procedure started with -module 'scg_optimize' to refine the bonded terms of a coarse-grained (CG) molecular model. -It works whether the optimization is ongoing or finished. The plot will be produced in the -directory provided via argument -opti_dir. - -Top row displays bonded terms score (global and breakdown) together with radius of gyration -(Rg) and solvent accessible surface area (SASA) estimations. We call these estimations because -they are calculated on short simulations used during optimization (time depends on parameters -used for optimization), therefore one should always run a long simulation at the end of the -optimizaton process, from which one can calculate the real Rg and SASA values for your model. - -Other rows display bond, angle and dihedral parameters tested together with their independant -score (distance from the AA distributions using EMD/Wasserstein). This allows to diagnose -issues, notably related to the topology defined in the ITP file, for example if the score -cannot go down for a specific group of bonds, angles or dihedrals. The optimization procedure -is in principle robust, as demonstrated in the paper, however problems can arise from the CG -representation used (e.g. if topology is too restrictive or incorrectly defined) and non-bonded -parameters (e.g. strong intra-molecular attractions that would not allow the molecule to adopt -extended conformations). -''', formatter_class=lambda prog: RawTextHelpFormatter(prog, width=135, max_help_position=52), add_help=False, usage=SUPPRESS) - - args_header = config.sep_close+'\n| ARGUMENTS |\n'+config.sep_close - # bullet = '❭' - # bullet = '★' - # bullet = '|' - bullet = ' ' - - required_args = args_parser.add_argument_group(args_header+'\n\n'+bullet+'INPUT/OUTPUT') - required_args.add_argument('-opti_dir', dest='opti_dirname', help='Directory created by module \'scg_optimize\' that contains all files\ngenerated during the optimization procedure', type=str, metavar='') - required_args.add_argument('-o', dest='plot_filename', help='Filename for the output plot, produced in directory -opti_dir.\nExtension/format can be one of: eps, pdf, pgf, png, ps, raw, rgba,\nsvg, svgz', type=str, default='opti_summary.png', metavar=' (opti_summary.png)') - - optional_args = args_parser.add_argument_group(bullet+'OTHERS') - optional_args.add_argument('-plot_scale', dest='plot_scale', help='Scale factor of the plot', type=float, default=1.0, metavar=' (1.0)') - optional_args.add_argument('-h', '--help', help='Show this help message and exit', action='help') - - # display help if script was called without arguments - if len(sys.argv) == 1: - args_parser.print_help() - sys.exit() - - # arguments handling, display command line if help or no arguments provided - # argcomplete.autocomplete(parser) - ns = args_parser.parse_args() - input_cmdline = ' '.join(map(cmd_quote, sys.argv)) - print('Working directory:', os.getcwd()) - print('Command line:', input_cmdline) - print() - print(config.sep_close) - print('| SUMMARIZING OPTIMIZATION PROCEDURE |') - print(config.sep_close) - print() - # parameters read_offset = 15 # nb of trailing fields that have static lengths in the recap file (i.e. NOT dependent on number of bonds, angles, etc.) min_nb_cols = 9 # to be sure we have enough columns for opti process plots, even if number of bonds/angles/dihedrals is less than this @@ -96,9 +45,14 @@ def main(): try: used_dihedrals = iter_indep_scores[:,0] for i in range(1, iter_indep_scores.shape[1]): - scg.forward_fill(iter_indep_scores[:,i], config.sim_crash_EMD_indep_score) - except IndexError: - sys.exit(config.header_error+'The optimization recap file seems empty, please wait for your optimization process to start or check for errors during execution') + iter_indep_scores[:, i] = forward_fill(iter_indep_scores[:,i], config.sim_crash_EMD_indep_score) + except IndexError as e: + msg = ( + "The optimization recap file seems empty, please wait for your optimization process " + "to start or check for errors during execution" + ) + raise exceptions.IncompleteOptimisationFile(msg) + # process files and plot with open(ns.opti_dirname+'/'+config.opti_perf_recap_file, 'r') as fp: @@ -139,10 +93,10 @@ def main(): all_eval_scores, all_eval_times, all_total_times = [], [], [] # worst_fit_score = round((nb_constraints+nb_bonds+nb_angles+nb_dihedrals) * config.sim_crash_EMD_indep_score, 3) - worst_fit_score = round(\ - np.sqrt((nb_constraints+nb_bonds) * config.sim_crash_EMD_indep_score) + \ - np.sqrt(nb_angles * config.sim_crash_EMD_indep_score) + \ - np.sqrt(nb_dihedrals * config.sim_crash_EMD_indep_score) \ + worst_fit_score = round( + np.sqrt((nb_constraints+nb_bonds) * config.sim_crash_EMD_indep_score) + + np.sqrt(nb_angles * config.sim_crash_EMD_indep_score) + + np.sqrt(nb_dihedrals * config.sim_crash_EMD_indep_score) , 3) all_fit_score_total, all_fit_score_constraints_bonds, all_fit_score_angles, all_fit_score_dihedrals = np.array([]), np.array([]), np.array([]), np.array([]) all_gyr_aa_mapped, all_gyr_aa_mapped_std, all_gyr_cg, all_gyr_cg_std = np.array([]), np.array([]), np.array([]), np.array([]) @@ -283,22 +237,22 @@ def main(): # display indicator when simulation(s) crashed for any reason -- check for None gyr_cg to identify a simulation as crashed crashes_ids = np.where(all_gyr_cg == None)[0]+1 - scg.forward_fill(all_eval_scores, None) - scg.forward_fill(all_fit_score_total, None) - scg.forward_fill(all_fit_score_constraints_bonds, None) - scg.forward_fill(all_fit_score_angles, None) - scg.forward_fill(all_fit_score_dihedrals, None) - scg.forward_fill(all_gyr_aa_mapped, None) - scg.forward_fill(all_gyr_aa_mapped_std, None) + all_eval_scores = forward_fill(all_eval_scores, None) + all_fit_score_total = forward_fill(all_fit_score_total, None) + all_fit_score_constraints_bonds = forward_fill(all_fit_score_constraints_bonds, None) + all_fit_score_angles = forward_fill(all_fit_score_angles, None) + all_fit_score_dihedrals = forward_fill(all_fit_score_dihedrals, None) + all_gyr_aa_mapped = forward_fill(all_gyr_aa_mapped, None) + all_gyr_aa_mapped_std = forward_fill(all_gyr_aa_mapped_std, None) # all_gyr_cg = np.where(all_gyr_cg == None, 0, all_gyr_cg) - scg.forward_fill(all_gyr_cg, None) - scg.forward_fill(all_gyr_cg_std, None) - scg.forward_fill(all_sasa_aa_mapped, None) - scg.forward_fill(all_sasa_aa_mapped_std, None) + all_gyr_cg = forward_fill(all_gyr_cg, None) + all_gyr_cg_std = forward_fill(all_gyr_cg_std, None) + all_sasa_aa_mapped = forward_fill(all_sasa_aa_mapped, None) + all_sasa_aa_mapped_std = forward_fill(all_sasa_aa_mapped_std, None) # all_sasa_cg = np.where(all_sasa_cg == None, 0, all_sasa_cg) - # scg.forward_fill(all_sasa_cg, 0) - scg.forward_fill(all_sasa_cg, None) - scg.forward_fill(all_sasa_cg_std, None) + # all_sasa_cg = forward_fill(all_sasa_cg, 0) + all_sasa_cg = forward_fill(all_sasa_cg, None) + all_sasa_cg_std = forward_fill(all_sasa_cg_std, None) for i in range(len(all_gyr_aa_mapped)): all_gyr_aa_mapped[i] += all_gyr_aa_mapped_offset @@ -657,5 +611,56 @@ def main(): print() +def main(): + + print(swarmcg.shared.styling.header_package( + ' Module: Optimization run analysis\n')) + + formatter = lambda prog: RawTextHelpFormatter(prog, width=135, max_help_position=52) + args_parser = ArgumentParser( + description=ANALYSE_DESCR, + formatter_class=formatter, + add_help=False, + usage=SUPPRESS + ) + + args_header = swarmcg.shared.styling.sep_close + '\n| ARGUMENTS |\n' + swarmcg.shared.styling.sep_close + bullet = ' ' + + required_args = args_parser.add_argument_group(args_header + '\n\n' + bullet + 'INPUT/OUTPUT') + required_args.add_argument('-opti_dir', dest='opti_dirname', + help='Directory created by module \'scg_optimize\' that contains all files\ngenerated during the optimization procedure', + type=str, metavar='') + required_args.add_argument('-o', dest='plot_filename', + help='Filename for the output plot, produced in directory -opti_dir.\nExtension/format can be one of: eps, pdf, pgf, png, ps, raw, rgba,\nsvg, svgz', + type=str, default='opti_summary.png', + metavar=' (opti_summary.png)') + + optional_args = args_parser.add_argument_group(bullet + 'OTHERS') + optional_args.add_argument('-plot_scale', dest='plot_scale', help='Scale factor of the plot', + type=float, default=1.0, metavar=' (1.0)') + optional_args.add_argument('-h', '--help', help='Show this help message and exit', + action='help') + + # display help if script was called without arguments + if len(sys.argv) == 1: + args_parser.print_help() + sys.exit() + + # arguments handling, display command line if help or no arguments provided + ns = args_parser.parse_args() + input_cmdline = ' '.join(map(cmd_quote, sys.argv)) + print('Working directory:', os.getcwd()) + print('Command line:', input_cmdline) + print() + print(swarmcg.shared.styling.sep_close) + print( + '| SUMMARIZING OPTIMIZATION PROCEDURE |') + print(swarmcg.shared.styling.sep_close) + print() + + run(ns) +if __name__ == "__main__": + main() diff --git a/swarmcg/config.py b/swarmcg/config.py index 5ee6555..0329d5e 100644 --- a/swarmcg/config.py +++ b/swarmcg/config.py @@ -1,25 +1,20 @@ # general stuff -module_version = '1.1.2' github_url = 'http://github.com/GMPavanLab/SwarmCG' gmx_path = 'gmx' -# clustering, defaults -default_dist_thres_bonds = 1 # nm -default_dist_thres_angles = 180 # degrees -default_dist_thres_dihedrals = 360 # degrees -# default_dist_thres_bonds = 0.01 # for tests, force splitting groups with distribution clustering -# default_dist_thres_angles = 1 # for tests, force splitting groups with distribution clustering -# default_dist_thres_dihedrals = 1 # for tests, force splitting groups with distribution clustering - # BI and FST-PSO OPTI, defaults kB = 0.008314462 -sim_temperature = 300 # Kelvin -bi_nb_bins = 50 # nb of bins to use for Boltzmann Inversion, will be doubled for dihedrals distributions binning during BI -- this has huge impact on the results of the BI and this value shall STAY AT 50 ! actually I did not try to modify much but this feels like dangerous atm -bonds_max_range = 5 # nm -- used to define grid for EMD calculations so increasing this only slightly increases computation time, however small bw for bonds has real impact -bw_constraints = 0.002 # nm -bw_bonds = 0.01 # nm -bw_angles = 2.5 # degrees -bw_dihedrals = 2.5 # degrees +sim_temperature = 300 # Kelvin +bi_nb_bins = 50 # nb of bins to use for Boltzmann Inversion, will be doubled for dihedrals distributions binning during BI -- this has huge impact on the results of the BI and this value shall STAY AT 50 ! actually I did not try to modify much but this feels like dangerous atm +bonds_max_range = 15 # nm -- used to define grid for EMD calculations +# NOTE: increasing bonds_max_range increases computation time, but memory usage increases exponentially +# TODO: detect when a bond has longer values than bonds_max_range and suggest the user to raise the limit if he really +# needs to, but I don't really see what kind of use case would require more than 5 nm (maybe elastic net in +# proteins though) +bw_constraints = 0.002 # nm +bw_bonds = 0.01 # nm +bw_angles = 2.5 # degrees +bw_dihedrals = 2.5 # degrees default_min_fct_bonds = 0 default_max_fct_bonds_bi = 17000 default_max_fct_bonds_opti = 18000 @@ -39,38 +34,43 @@ default_abs_range_fct_dihedrals_bi_func_with_mult = 3.5 default_abs_range_fct_dihedrals_opti_func_with_mult = 15 -bonds2angles_scoring_factor = 500 # multiplier applied to constraints/bonds EMD scores to retrieve angles/dihedrals mismatches that are comparable, for the opti scoring function -sim_crash_EMD_indep_score = 150 # when a simulation crashes or does not finish for any reason: EMD distance between 2 distributions, for 1 geom +bonds2angles_scoring_factor = 500 # multiplier applied to constraints/bonds EMD scores to retrieve angles/dihedrals mismatches that are comparable, for the opti scoring function +sim_crash_EMD_indep_score = 150 # when a simulation crashes or does not finish for any reason: EMD distance between 2 distributions, for 1 geom # bonds scaling, default -bonds_scaling = 1.0 # ratio -min_bonds_length = 0.00 # nm -bonds_scaling_str = '' # constraints and bonds ids + their required target AA-mapped distributions rescaled averages +bonds_scaling = 1.0 # ratio +min_bonds_length = 0.00 # nm +bonds_scaling_str = '' # constraints and bonds ids + their required target AA-mapped distributions rescaled averages # building of the initial guesses for optimization, defaults -bond_dist_guess_variation = 0.025 # nm -angle_value_guess_variation = 10 # degrees -dihedral_value_guess_variation = 10 # degrees -# val_guess_fact = 1.0 # factor to apply to initial geoms values to find low and high boundaries for random generation of particles' values -- now adjusted according to optimization cycles -# fct_guess_fact = 0.2 # factor to apply to initial force constant to find low and high boundaries for random generation of particles' force constants -- now adjusted according to optimization cycles -fct_guess_min_flat_diff_bonds = 200 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants -fct_guess_min_flat_diff_angles = 50 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants -fct_guess_min_flat_diff_dihedrals_without_mult = 0.50 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants -fct_guess_min_flat_diff_dihedrals_with_mult = 0.20 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants +bond_dist_guess_variation = 0.025 # nm +angle_value_guess_variation = 10 # degrees +dihedral_value_guess_variation = 10 # degrees +fct_guess_min_flat_diff_bonds = 200 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants +fct_guess_min_flat_diff_angles = 50 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants +fct_guess_min_flat_diff_dihedrals_without_mult = 0.50 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants +fct_guess_min_flat_diff_dihedrals_with_mult = 0.20 # flat minimum force constant variation that fct_guess_fact shall yield, used to find low and high boundaries for random generation of particles' force constants # gromacs functions that are properly treated at the moment # if we find a function that is not handled, program will exit with an appropriate error message -handled_constraints_functions = [1] -handled_bonds_functions = [1] -handled_angles_functions = [1, 2] -handled_dihedrals_functions = [1, 2, 4] -dihedral_func_with_mult = [1, 4] # these functions use 3 parameters, the last one being multiplicity (if it's omitted gromacs will use 1 by default, we reproduce this behavior) -# TODO: handle dihedral function 9 correctly so that different potentials can be stacked for the same beads -- this is the primary purpose of function 9 !! +# TODO: handle dihedral function 9 correctly so that different potentials can be stacked for the same beads +# this is the primary purpose of function 9 !! +handled_functions = { + 'constraint': [1], # tested and verified: 1 + 'bond': [1], # tested and verified: 1 + 'angle': [1, 2], # tested and verified: 1, 2 + 'dihedral': [1, 2, 4], # tested and verified: 1, 2, 4 -- ongoing: 9 (need to merge the 1+ dihedrals groups on plots) + 'virtual_sites2': [1], # tested and verified: 1 -- ongoing: 2 (need GMX 2020) + 'virtual_sites3': [1, 2, 3, 4], # tested and verified: 1, 2, 3, 4 + 'virtual_sites4': [2], # tested and verified: 2 -- irrelevant: 1 + 'virtual_sitesn': [1, 2, 3] # tested and verified: 1, 2, 3 +} +dihedral_func_with_mult = [1, 4, 9] # these functions use 3 parameters, the last one being multiplicity # plots display parameters -use_hists = False # hists are not implemented in a way that they will be displayed with left and right borders, as it is already the case for bonds -line_alpha = 0.6 # line alpha for the density plots -fill_alpha = 0.35 # fill alpha for the density plots +use_hists = False # hists are not implemented in a way that they will be displayed with left and right bold borders atm +line_alpha = 0.6 # line alpha for the density plots +fill_alpha = 0.30 # fill alpha for the density plots cg_color = '#1f77b4' atom_color = '#d62728' @@ -86,6 +86,7 @@ help_aa_tpr = 'Topology binary file of your reference AA simulation (TPR)' help_aa_traj = 'Trajectory file of the reference AA simulation (XTC, TRR)\nPBC are handled internally if trajectory contains box dimensions' help_cg_map = 'Mapping file of the atoms to CG beads (NDX-like file format)' +help_mapping_type = 'Center Of Mass (COM) or Center Of Geometry (COG), for\ninterpreting the mapping file' help_verbose = 'Display more details on each processing step' help_gmx_path = 'Your Gromacs alias/path' help_bonds_scaling = 'Scaling factor for ALL AA-mapped bonds/constraints lengths\nOnly one of arguments -bonds_scaling, -bonds_scaling_str\nand -min_bonds_length can be provided' @@ -105,7 +106,7 @@ # optimization output filenames input_sim_files_dirname = '.internal/input_CG_simulation_files' -iteration_sim_files_dirname = 'CG_sim_files' # basename to be appended to with _NN +iteration_sim_files_dirname = 'CG_sim_files' # basename to be appended to with _NN best_fitted_model_dirname = 'optimized_CG_model' distrib_plots_all_evals_dirname = 'all_evals_distributions' log_files_all_evals_dirname = 'all_evals_logs' @@ -115,15 +116,6 @@ ref_distrib_plots = 'reference_AA_distributions.png' best_distrib_plots = 'optimized_CG_model_distributions.png' -# stdout display formatting -sep = '----------------------------------------------------------------------' -sep_close = '+---------------------------------------------------------------------------------------------+' -# header_warning = '\n========================= /!\\ WARNING /!\\ ==========================\n' -header_warning = '\n-- ! WARNING ! --\n' -# header_error = '\n========================== /!\\ ERROR /!\\ ===========================\n' -header_error = '\n-- ! ERROR ! --\n' -header_gmx_error = sep+'\n GMX ERROR MSG\n'+sep+'\n\n' - diff --git a/swarmcg/evaluate_model.py b/swarmcg/evaluate_model.py index a70f388..95321ce 100644 --- a/swarmcg/evaluate_model.py +++ b/swarmcg/evaluate_model.py @@ -1,183 +1,232 @@ +# some numpy version have this ufunc warning at import + many packages call numpy and display annoying warnings import warnings -# some numpy version have this ufunc warning at import + many packages call numpy and display annoying warnings warnings.filterwarnings("ignore") -import os, re, sys -from shlex import quote as cmd_quote -import matplotlib -matplotlib.use('AGG') # use the Anti-Grain Geometry non-interactive backend suited for scripted PNG creation -import matplotlib.pyplot as plt +import os, sys from argparse import ArgumentParser, RawTextHelpFormatter, SUPPRESS -# import argcomplete # arguments completion currently disabled because it's one more library to install -from random import randint -import numpy as np -import scipy.stats -from pyemd import emd -import MDAnalysis as mda -from . import config -from . import swarmCG as scg -warnings.resetwarnings() - - -def main(): - - from numpy import VisibleDeprecationWarning - warnings.filterwarnings("ignore", category=VisibleDeprecationWarning) # filter MDAnalysis + numpy deprecation stuff that is annoying - - # TODO: make it possible to feed a delta for Rg in case the model has scaling ? - - # command for tests - # ./evaluate_model.py -aa_tpr ../DATA_MDL/Mono_B/atomistic/MOB_atomistic_solvated.tpr -aa_traj ../DATA_MDL/Mono_B/atomistic/MOB_atomistic_solvated.xtc -cg_map ../DATA_MDL/Mono_B/MOB_atom_mapping/atomistic/MOB_mapping.ndx -cg_itp MOB_HUMAN_SIM/Mono_B.itp -cg_tpr MOB_HUMAN_SIM/cg_solvated.tpr -cg_traj MOB_HUMAN_SIM/cg_solvated.xtc - - # ./evaluate_model.py -aa_tpr ../DATA_MDL/Mono_B/atomistic/MOB_atomistic_solvated.tpr -aa_traj ../DATA_MDL/Mono_B/atomistic/MOB_atomistic_solvated.xtc -cg_map ../DATA_MDL/Mono_B/MOB_atom_mapping/atomistic/MOB_mapping.ndx -cg_itp MOB_HUMAN_SIM/Mono_B.itp - - # ./evaluate_model.py -aa_tpr ../DATA_MDL/BTA/BTA_atom_mapping/atomistic/fake_for_test.tpr -aa_traj ../DATA_MDL/BTA/BTA_atom_mapping/atomistic/ATOM_TRAJ.xtc -cg_map ../DATA_MDL/BTA/BTA_atom_mapping/atomistic/index_bta.ndx -cg_itp BTA_HUMAN_SIM/bta_P3.itp -cg_tpr BTA_HUMAN_SIM/cg_solvated.tpr -cg_traj BTA_HUMAN_SIM/cg_solvated.xtc - - # ./evaluate_model.py -aa_tpr ../DATA_MDL/POPC/AA/topol.tpr -aa_traj ../DATA_MDL/POPC/AA/traj.trr -cg_map ../DATA_MDL/POPC/CG/mapping.ndx -cg_itp POPC_CG_SIM/martini_v2.0_POPC_02.itp -cg_tpr POPC_CG_SIM/cg_solvated.tpr -cg_traj POPC_CG_SIM/cg_solvated.xtc - - # ./evaluate_model.py -aa_tpr ../DATA_MDL/B3T/AA/topol.tpr -aa_traj ../DATA_MDL/B3T/AA/traj_atom.xtc -cg_map ../DATA_MDL/B3T/CG/MAPPING.ndx -cg_itp ../DATA_MDL/B3T/CG/MARTINI/B3T_CG.itp -cg_tpr B3T_HUMAN_SIM/produced.tpr -cg_traj B3T_HUMAN_SIM/produced.xtc - - - print(scg.header_package(' Module: Model bonded terms assessment\n')) - - args_parser = ArgumentParser(description='''\ -This module enables quick evaluation of the fit of bond, angle and dihedral distributions between -a CG model trajectory and a reference AA model trajectory of an identical molecule, in a single -comprehensive figure. The figure's rows display bond, angle and dihedral distributions for groups -present in your system according to the ITP file. +from shlex import quote as cmd_quote -Arguments allows to specify scaling of the AA bonds used as reference to optimize the CG model. -An image displaying all AA reference distributions will be created at the very beginning of the -optimization process. You can check it to make sure scaling is conform to your expectations. +import matplotlib -The CG model preliminary ITP file follows the standard ITP format, with one subtlety. The file -can include groups of bonds, angles and dihedrals that will be considered identical. Their -distributions will be averaged within groups. This is important to obtain reliable results for -symmetrical molecules. Groups can be formed using empty line(s) or comment(s), like this: +import swarmcg.shared.styling +from swarmcg import swarmCG as scg +from swarmcg import config +from swarmcg.shared import exceptions +from swarmcg.shared.styling import EVALUATE_DESCR - [ angles ] +warnings.resetwarnings() +matplotlib.use('AGG') # use the Anti-Grain Geometry non-interactive backend suited for scripted PNG creation - ; i j k funct angle force.c. - ; grp 1 - 5 6 10 1 150 40 - 9 8 11 1 150 40 - ; grp 2 - 1 6 10 2 120 0 ; NOTE: either comment(s) or empty line(s) - 4 8 11 2 120 0 ; separate groups of bonds/ang/dihe. -The AA trajectory is mapped on-the-fly using file from argument -cg_map, which uses gromacs NDX -file format. Periodic boundary conditions are handled internally if the input trajectories -contain box dimensions.''', formatter_class=lambda prog: RawTextHelpFormatter(prog, width=135, max_help_position=52), add_help=False, usage=SUPPRESS) +def run(ns): - all_args_header = config.sep_close+'\n| REQUIRED/OPTIONAL ARGUMENTS |\n'+config.sep_close - # bullet = '❭' - # bullet = '★' - # bullet = '|' - bullet = ' ' + print() + print(swarmcg.shared.styling.sep_close) + print('| PRE-PROCESSING |') + print(swarmcg.shared.styling.sep_close) + print() - required_args = args_parser.add_argument_group(all_args_header+'\n\n'+bullet+'MODELS FILES') - required_args.add_argument('-aa_tpr', dest='aa_tpr_filename', help=config.help_aa_tpr, type=str, default=config.metavar_aa_tpr, metavar=' '+scg.par_wrap(config.metavar_aa_tpr)) - required_args.add_argument('-aa_traj', dest='aa_traj_filename', help=config.help_aa_traj, type=str, default=config.metavar_aa_traj, metavar=' '+scg.par_wrap(config.metavar_aa_traj)) - required_args.add_argument('-cg_map', dest='cg_map_filename', help=config.help_cg_map, type=str, default=config.metavar_cg_map, metavar=' '+scg.par_wrap(config.metavar_cg_map)) - required_args.add_argument('-cg_itp', dest='cg_itp_filename', help='ITP file of the CG model to evaluate', type=str, default=config.metavar_cg_itp, metavar=' '+scg.par_wrap(config.metavar_cg_itp)) - required_args.add_argument('-cg_tpr', dest='cg_tpr_filename', help='TPR file of your CG simulation (omit for solo AA inspection)', type=str, default=config.metavar_cg_tpr, metavar=' '+scg.par_wrap(config.metavar_cg_tpr)) - required_args.add_argument('-cg_traj', dest='cg_traj_filename', help='XTC file of your CG trajectory (omit for solo AA inspection)', type=str, default=config.metavar_cg_traj, metavar=' '+scg.par_wrap(config.metavar_cg_traj)) - # required_args.add_argument('-figmolname', dest='figmolname', help='TODO REMOVE', type=str, required=True) # TODO: remove, this was just for figures + from numpy import VisibleDeprecationWarning + warnings.filterwarnings("ignore", category=VisibleDeprecationWarning) # filter MDAnalysis + numpy deprecation stuff that is annoying - optional_args = args_parser.add_argument_group(bullet+'CG MODEL SCALING') - # optional_args.add_argument('-nb_threads', dest='nb_threads', help='number of threads to use', type=int, default=1, metavar='1') # TODO: does NOT work properly -- modif MDAnalysis code with OpenMP num_threads(n) in the pragma - optional_args.add_argument('-bonds_scaling', dest='bonds_scaling', help=config.help_bonds_scaling, type=float, default=config.bonds_scaling, metavar=' '+scg.par_wrap(config.bonds_scaling)) - optional_args.add_argument('-bonds_scaling_str', dest='bonds_scaling_str', help=config.help_bonds_scaling_str, type=str, default=config.bonds_scaling_str, metavar='') - optional_args.add_argument('-min_bonds_length', dest='min_bonds_length', help=config.help_min_bonds_length, type=float, default=config.min_bonds_length, metavar=' '+scg.par_wrap(config.min_bonds_length)) - optional_args.add_argument('-b2a_score_fact', dest='bonds2angles_scoring_factor', help=config.help_bonds2angles_scoring_factor, type=float, default=config.bonds2angles_scoring_factor, metavar=' '+scg.par_wrap(config.bonds2angles_scoring_factor)) - # ONLY FOR PAPER FIGURES - # optional_args.add_argument('-datamol', dest='datamol', help='Save bonded score and Rg values for each frame across simulation', type=str, default='MOL_EXEC_MODE') - - graphical_args = args_parser.add_argument_group(bullet+'FIGURE DISPLAY') - graphical_args.add_argument('-mismatch_ordering', dest='mismatch_order', help='Enables ordering of bonds/angles/dihedrals by mismatch score\nbetween pairwise AA-mapped/CG distributions (can help diagnosis)', default=False, action='store_true') - graphical_args.add_argument('-bw_constraints', dest='bw_constraints', help=config.help_bw_constraints, type=float, default=config.bw_constraints, metavar=' '+scg.par_wrap(config.bw_constraints)) - graphical_args.add_argument('-bw_bonds', dest='bw_bonds', help=config.help_bw_bonds, type=float, default=config.bw_bonds, metavar=' '+scg.par_wrap(config.bw_bonds)) - graphical_args.add_argument('-bw_angles', dest='bw_angles', help=config.help_bw_angles, type=float, default=config.bw_angles, metavar=' '+scg.par_wrap(config.bw_angles)) - graphical_args.add_argument('-bw_dihedrals', dest='bw_dihedrals', help=config.help_bw_dihedrals, type=float, default=config.bw_dihedrals, metavar=' '+scg.par_wrap(config.bw_dihedrals)) - graphical_args.add_argument('-disable_x_scaling', dest='row_x_scaling', help='Disable auto-scaling of X axis across each row of the plot', default=True, action='store_false') - graphical_args.add_argument('-disable_y_scaling', dest='row_y_scaling', help='Disable auto-scaling of Y axis across each row of the plot', default=True, action='store_false') - graphical_args.add_argument('-bonds_max_range', dest='bonded_max_range', help=config.help_bonds_max_range, type=float, default=config.bonds_max_range, metavar=' '+scg.par_wrap(config.bonds_max_range)) - graphical_args.add_argument('-ncols', dest='ncols_max', help='Max. nb of columns displayed in figure', type=int, default=0, metavar='') # TODO: make this a line return in plot instead of ignoring groups - - optional_args2 = args_parser.add_argument_group(bullet+'OTHERS') - optional_args2.add_argument('-o', dest='plot_filename', help='Filename for the output plot (extension/format can be one of:\neps, pdf, pgf, png, ps, raw, rgba, svg, svgz)', type=str, default='distributions.png', metavar='distributions.png') - optional_args2.add_argument('-h', '--help', action='help', help='Show this help message and exit') - optional_args2.add_argument('-v', '--verbose', dest='verbose', help=config.help_verbose, action='store_true', default=False) - - # display help if script was called without arguments - if len(sys.argv) == 1: - args_parser.print_help() - sys.exit() + # TODO: make it possible to feed a delta/offset for Rg in case the model has bonds scaling ? - # arguments handling, display command line if help or no arguments provided - # argcomplete.autocomplete(parser) - ns = args_parser.parse_args() - input_cmdline = ' '.join(map(cmd_quote, sys.argv)) - print('Working directory:', os.getcwd()) - print('Command line:', input_cmdline) + # get basenames for simulation files + ns.cg_itp_basename = os.path.basename(ns.cg_itp_filename) - ns.molname_in = None # TODO: arguments that exist only in the scope of optimization (useless for manual model evaluation) -- but this could be modified to be allowed to evaluate models in mixed membranes, averaging distribs for given molecule name only + # NOTE: some arguments exist only in the scope of optimization (optimize_model.py) or only in the scope of model + # evaluation (evaluate_mode.py), so they need to be defined here + ns.molname_in = None ns.gyr_aa_mapped, ns.gyr_aa_mapped_std = None, None - # ns.sasa_aa_mapped, ns.sasa_aa_mapped_std = None, None - ns.aa_rg_offset = 0 + ns.sasa_aa_mapped, ns.sasa_aa_mapped_std = None, None + ns.aa_rg_offset = 0 # TODO: allow an argument more in evaluate_model, like in optimiwe_model, for adding an offset to Rg scg.set_MDA_backend(ns) - # TODO: add missing checks -- if some are missing - # TODO: factorize all checks and put them in global lib if not os.path.isfile(ns.aa_tpr_filename): - sys.exit(config.header_error+'Cannot find coordinate file of the atomistic simulation\n(GRO, PDB, or other trajectory formats supported by MDAnalysis)') + msg = ( + f"Cannot find topology file of the atomistic simulation at location: {ns.aa_tpr_filename}\n" + f"(TPR or other portable topology formats supported by MDAnalysis)" + ) + raise exceptions.MissingCoordinateFile(msg) if not os.path.isfile(ns.aa_traj_filename): - sys.exit(config.header_error+'Cannot find trajectory file of the atomistic simulation\n(XTC, TRR, or other trajectory formats supported by MDAnalysis)') + msg = ( + f"Cannot find trajectory file of the atomistic simulation at location: {ns.aa_traj_filename}\n" + f"(XTC, TRR, or other trajectory formats supported by MDAnalysis)" + ) + raise exceptions.MissingTrajectoryFile(msg) + if not os.path.isfile(ns.cg_map_filename): - sys.exit(config.header_error+'Cannot find CG beads mapping file (NDX-like file format)') + msg = ( + f"Cannot find CG beads mapping file at location: {ns.cg_map_filename}\n" + f"(NDX-like file format)" + ) + raise exceptions.MissingIndexFile(msg) + if not os.path.isfile(ns.cg_itp_filename): - sys.exit(config.header_error+'Cannot find ITP file of the CG model') + msg = f"Cannot find ITP file of the CG model at location: {ns.cg_itp_filename}" + raise exceptions.MissingItpFile(msg) # check bonds scaling arguments conflicts if (ns.bonds_scaling != config.bonds_scaling and ns.min_bonds_length != config.min_bonds_length) or (ns.bonds_scaling != config.bonds_scaling and ns.bonds_scaling_str != config.bonds_scaling_str) or (ns.min_bonds_length != config.min_bonds_length and ns.bonds_scaling_str != config.bonds_scaling_str): - sys.exit(config.header_error+'Only one of arguments -bonds_scaling, -bonds_scaling_str and -min_bonds_length can be provided\nPlease check your parameters') - # if ns.bonds_scaling < 1: - # sys.exit(config.header_error+'Bonds scaling factor is inferior to 1, please check your parameters') - - print() - print(config.sep_close) - print('| PRE-PROCESSING |') - print(config.sep_close) - print() + msg = ( + "Only one of arguments -bonds_scaling, -bonds_scaling_str and -min_bonds_length " + "can be provided. Please check your parameters" + ) + raise exceptions.InputArgumentError(msg) + + # check the mapping type + ns.mapping_type = ns.mapping_type.upper() + if ns.mapping_type != 'COM' and ns.mapping_type != 'COG': + msg = "Mapping type provided via argument '-mapping' must be either COM or COG (Center of Mass or Center of Geometry)." + raise exceptions.InputArgumentError(msg) # display parameters for function compare_models if not os.path.isfile(ns.cg_tpr_filename) or not os.path.isfile(ns.cg_traj_filename): - # switch to atomistic mapping inspection exclusively (= do NOT use real CG distributions) + # switch to atomistic mapping inspection exclusively (= do NOT plot the CG distributions) print('Could not find file(s) for either CG topology or trajectory') print(' Going for inspection of AA-mapped distributions exclusively') print() ns.atom_only = True else: ns.atom_only = False - # elif ns.cg_tpr_filename is not None and ns.cg_traj_filename is not None: - # ns.atom_only = False - # elif ns.cg_tpr_filename is not None or ns.cg_traj_filename is not None: - # if not os.path.isfile(str(ns.cg_tpr_filename)): - # sys.exit(config.header_error+'Cannot find portable run file of the coarse-grained simulation\n(TPR, or other portable formats supported by MDAnalysis)\nIf you want to look at distributions of your atomistic simulation\nexclusively, you have to omit both arguments -cg_tpr and -cg_traj') - # if not os.path.isfile(str(ns.cg_traj_filename)): - # sys.exit(config.header_error+'Cannot find trajectory file of the coarse-grained simulation (XTC, TRR, or other trajectory formats supported by MDAnalysis)\nIf you want to look at distributions of your atomistic simulation\nexclusively, you have to omit both arguments -aa_tpr and -cg_traj') try: if not ns.plot_filename.split('.')[-1] in ['eps', 'pdf', 'pgf', 'png', 'ps', 'raw', 'rgba', 'svg', 'svgz']: ns.plot_filename = ns.plot_filename+'.png' - except IndexError: + except IndexError as e: ns.plot_filename = ns.plot_filename+'.png' - scg.create_bins_and_dist_matrices(ns) + scg.create_bins_and_dist_matrices(ns) # bins for EMD calculations + scg.read_ndx_atoms2beads(ns) # read mapping, get atoms accurences in beads + scg.get_atoms_weights_in_beads(ns) # get weights of atoms within beads + + scg.read_cg_itp_file(ns) # load the ITP object and find out geoms grouping + scg.process_scaling_str(ns) # process the bonds scaling specified by user + + print() + scg.read_aa_traj(ns) # create universe and read traj + scg.load_aa_data(ns) # read atoms attributes + scg.make_aa_traj_whole_for_selected_mols(ns) + + # for each CG bead, create atom groups for trajectory geoms calculation using mass and atom weights across beads + scg.get_beads_MDA_atomgroups(ns) + + print('\nMapping the trajectory from AA to CG representation') + scg.initialize_cg_traj(ns) + scg.map_aa2cg_traj(ns) + print() + scg.compare_models(ns, manual_mode=True, calc_sasa=False) +def main(): + + print(swarmcg.shared.styling.header_package( + ' Module: Model bonded terms assessment\n')) + + formatter = lambda prog: RawTextHelpFormatter(prog, width=135, max_help_position=52) + args_parser = ArgumentParser( + description=EVALUATE_DESCR, + formatter_class=formatter, + add_help=False, + usage=SUPPRESS + ) + + all_args_header = swarmcg.shared.styling.sep_close + '\n| REQUIRED/OPTIONAL ARGUMENTS |\n' + swarmcg.shared.styling.sep_close + bullet = ' ' + + required_args = args_parser.add_argument_group( + all_args_header + '\n\n' + bullet + 'MODELS FILES') + required_args.add_argument('-aa_tpr', dest='aa_tpr_filename', help=config.help_aa_tpr, type=str, + default=config.metavar_aa_tpr, + metavar=' ' + scg.par_wrap(config.metavar_aa_tpr)) + required_args.add_argument('-aa_traj', dest='aa_traj_filename', help=config.help_aa_traj, + type=str, default=config.metavar_aa_traj, + metavar=' ' + scg.par_wrap(config.metavar_aa_traj)) + required_args.add_argument('-cg_map', dest='cg_map_filename', help=config.help_cg_map, type=str, + default=config.metavar_cg_map, + metavar=' ' + scg.par_wrap(config.metavar_cg_map)) + required_args.add_argument('-mapping', dest='mapping_type', help=config.help_mapping_type, type=str, + default='COM', metavar=' (COM)') + required_args.add_argument('-cg_itp', dest='cg_itp_filename', + help='ITP file of the CG model to evaluate', type=str, + default=config.metavar_cg_itp, + metavar=' ' + scg.par_wrap(config.metavar_cg_itp)) + required_args.add_argument('-cg_tpr', dest='cg_tpr_filename', + help='TPR file of your CG simulation (omit for solo AA inspection)', + type=str, default=config.metavar_cg_tpr, + metavar=' ' + scg.par_wrap(config.metavar_cg_tpr)) + required_args.add_argument('-cg_traj', dest='cg_traj_filename', + help='XTC file of your CG trajectory (omit for solo AA inspection)', + type=str, default=config.metavar_cg_traj, + metavar=' ' + scg.par_wrap(config.metavar_cg_traj)) + + optional_args = args_parser.add_argument_group(bullet + 'CG MODEL SCALING') + # optional_args.add_argument('-nb_threads', dest='nb_threads', help='number of threads to use', type=int, default=1, metavar='1') # TODO: does NOT work properly -- modif MDAnalysis code with OpenMP num_threads(n) in the pragma + optional_args.add_argument('-bonds_scaling', dest='bonds_scaling', + help=config.help_bonds_scaling, type=float, + default=config.bonds_scaling, + metavar=' ' + scg.par_wrap(config.bonds_scaling)) + optional_args.add_argument('-bonds_scaling_str', dest='bonds_scaling_str', + help=config.help_bonds_scaling_str, type=str, + default=config.bonds_scaling_str, metavar='') + optional_args.add_argument('-min_bonds_length', dest='min_bonds_length', + help=config.help_min_bonds_length, type=float, + default=config.min_bonds_length, + metavar=' ' + scg.par_wrap(config.min_bonds_length)) + optional_args.add_argument('-b2a_score_fact', dest='bonds2angles_scoring_factor', + help=config.help_bonds2angles_scoring_factor, type=float, + default=config.bonds2angles_scoring_factor, + metavar=' ' + scg.par_wrap(config.bonds2angles_scoring_factor)) + + graphical_args = args_parser.add_argument_group(bullet + 'FIGURE DISPLAY') + graphical_args.add_argument('-mismatch_ordering', dest='mismatch_order', + help='Enables ordering of bonds/angles/dihedrals by mismatch score\nbetween pairwise AA-mapped/CG distributions (can help diagnosis)', + default=False, action='store_true') + graphical_args.add_argument('-bw_constraints', dest='bw_constraints', + help=config.help_bw_constraints, type=float, + default=config.bw_constraints, + metavar=' ' + scg.par_wrap(config.bw_constraints)) + graphical_args.add_argument('-bw_bonds', dest='bw_bonds', help=config.help_bw_bonds, type=float, + default=config.bw_bonds, + metavar=' ' + scg.par_wrap(config.bw_bonds)) + graphical_args.add_argument('-bw_angles', dest='bw_angles', help=config.help_bw_angles, + type=float, default=config.bw_angles, + metavar=' ' + scg.par_wrap(config.bw_angles)) + graphical_args.add_argument('-bw_dihedrals', dest='bw_dihedrals', help=config.help_bw_dihedrals, + type=float, default=config.bw_dihedrals, + metavar=' ' + scg.par_wrap(config.bw_dihedrals)) + graphical_args.add_argument('-disable_x_scaling', dest='row_x_scaling', + help='Disable auto-scaling of X axis across each row of the plot', + default=True, action='store_false') + graphical_args.add_argument('-disable_y_scaling', dest='row_y_scaling', + help='Disable auto-scaling of Y axis across each row of the plot', + default=True, action='store_false') + graphical_args.add_argument('-bonds_max_range', dest='bonded_max_range', + help=config.help_bonds_max_range, type=float, + default=config.bonds_max_range, + metavar=' ' + scg.par_wrap(config.bonds_max_range)) + graphical_args.add_argument('-ncols', dest='ncols_max', + help='Max. nb of columns displayed in figure', type=int, default=0, + metavar='') # TODO: make this a line return in plot instead of ignoring groups + + optional_args2 = args_parser.add_argument_group(bullet + 'OTHERS') + optional_args2.add_argument('-o', dest='plot_filename', + help='Filename for the output plot (extension/format can be one of:\neps, pdf, pgf, png, ps, raw, rgba, svg, svgz)', + type=str, default='distributions.png', metavar=' (distributions.png)') + optional_args2.add_argument('-h', '--help', action='help', + help='Show this help message and exit') + optional_args2.add_argument('-v', '--verbose', dest='verbose', help=config.help_verbose, + action='store_true', default=False) + # arguments handling, display command line if help or no arguments provided + ns = args_parser.parse_args() + input_cmdline = ' '.join(map(cmd_quote, sys.argv)) + print('Working directory:', os.getcwd()) + print('Command line:', input_cmdline) + run(ns) +if __name__ == "__main__": + main() diff --git a/swarmcg/optimize_model.py b/swarmcg/optimize_model.py index 8605a43..92a0989 100644 --- a/swarmcg/optimize_model.py +++ b/swarmcg/optimize_model.py @@ -1,665 +1,744 @@ -import warnings - # some numpy version have this ufunc warning at import + many packages call numpy and display annoying warnings +import warnings warnings.filterwarnings("ignore") -import os, sys, re, shutil, subprocess, time, copy, contextlib +import os, sys, shutil, subprocess, time, copy, contextlib from argparse import ArgumentParser, RawTextHelpFormatter, SUPPRESS from shlex import quote as cmd_quote +from datetime import datetime + from fstpso import FuzzyPSO import numpy as np -from datetime import datetime -from scipy.optimize import curve_fit -import MDAnalysis as mda -from . import config -from . import swarmCG as scg + +import swarmcg.shared.styling +from swarmcg import config +from swarmcg.shared import exceptions +from swarmcg import swarmCG as scg +from swarmcg.shared.styling import OPTIMISE_DESCR + warnings.resetwarnings() -def main(): +def run(ns): - from numpy import VisibleDeprecationWarning - warnings.filterwarnings("ignore", category=VisibleDeprecationWarning) # filter MDAnalysis + numpy deprecation stuff that is annoying + from numpy import VisibleDeprecationWarning + warnings.filterwarnings("ignore", category=VisibleDeprecationWarning) # filter MDAnalysis + numpy deprecation stuff that is annoying - # TODO: allow to feed a JSON file or DICT-like string for which bonds group to rescale for AA - # TODO: allow to feed a JSON file for cycles of optimization ?? this is more optional but useful for big stuff possibly - # TODO: if using SASA through GMX SASA, ensure vdwradii.dat contains the MARTINI radii - # TODO: give a warning when users specify a bond scaling without specifying an Rg offset !!! - - # TODO: AT OPTI CYCLE 2, FIND ANGLES THAT ARE TOO STEEP (CG) AND WHEN GENERATING THE NEW GUESSES, PUT 10-30-50-70% OF THE CURRENT BEST FORCE CONSTANT IN SEVERAL PARTICLES !!!!!!!!! - - # NOTE: gmx trjconv and sasa may produce bugs when using TPR produced with gromacs v5, only current solution seems to be implementing the SASA calculation using MDTraj - - - ##################################### - # ARGUMENTS HANDLING / HELP DISPLAY # - ##################################### - - print(scg.header_package(' Module: CG model optimization\n')) - - args_parser = ArgumentParser(description='''\ -This module automatically optimizes the bonded parameters of a CG model to best match the bonds, -angles and dihedrals distributions of a reference AA model. Different sets of bonded parameters -are explored via swarm optimization (FST-PSO) and iterative CG simulations. Bonded parameters are -evaluated for the matching they produce between AA and CG distributions via a scoring function -relying on the Earth Movers' Distance (EMD/Wasserstein). The process is designed to execute in -4-24h on a standard desktop machine, according to hardware, molecule size and simulations setup. - -This module has 2 optimization modes: - - (1) TUNE BOTH BONDS LENGTHS, ANGLES/DIHEDRALS VALUES AND THEIR FORCE CONSTANTS. First uses - Boltzmann Inversion to estimate bonds lengths, angles/dihedrals values and their force - constants, then runs optimization to best fit the reference AA-mapped distributions. - - (2) TUNE ONLY FORCE CONSTANTS FOR ANGLES/DIHEDRALS VALUES AND ALL PARAMETERS FOR BONDS. - Equilibrium values of angles/dihedrals provided in the preliminary CG ITP model are - conserved while optimization best fits reference AA-mapped distributions. - -Independently of parameters, the expected input is: - - (1) Atomistic trajectory of the molecule (gromacs binary TPR + trajectory files XTC TRR) - (2) Mapping file, atoms to CG beads (gromacs NDX format) - (3) CG model ITP file to be optimized (group identical bonds/angles/dihedrals, see below) - (4) CG simulation files (initial configuration GRO + system TOP + MDP files) - -You can prepare a directory using default input filenames, then provide only argument -in_dir. -If -in_dir is provided, all filenames provided as arguments will also be searched for within -this directory. Demonstration data are available at '''+config.github_url+'''. - -Arguments allows to specify scaling of the AA bonds used as reference to optimize the CG model. -An image displaying all AA reference distributions will be created at the very beginning of the -optimization process. You can check it to make sure scaling is conform to your expectations. - -The CG model preliminary ITP file follows the standard ITP format, with one subtlety. The file -can include groups of bonds, angles and dihedrals that will be considered identical. Their -distributions will be averaged within groups. This is important to obtain reliable results for -symmetrical molecules. Groups can be formed using empty line(s) or comment(s), like this: - - [ angles ] - - ; i j k funct angle force.c. - ; grp 1 - 5 6 10 1 150 40 ; NOTE 1: force constants can be set to 0 - 9 8 11 1 150 40 ; in the prelim. model to optimize - ; grp 2 - 1 6 10 2 120 0 ; NOTE 2: either comment(s) or empty line(s) - 4 8 11 2 120 0 ; separate groups of bonds/ang/dihe. - -The AA trajectory is mapped on-the-fly using file from argument -cg_map, which uses gromacs NDX -file format. Periodic boundary conditions are handled internally if the input AA trajectory -contains box dimensions.''', formatter_class=lambda prog: RawTextHelpFormatter(prog, width=135, max_help_position=52), add_help=False, usage=SUPPRESS) - - # TODO: handle trajectories for which no box informations are provided - # TODO: explain what is modified in the MDP - # TODO: explain module analyze_opti_moves.py can be used to monitor optimization at any point of the process - # TODO: end the help message by a new frame with examples from the demo data - - req_args_header = config.sep_close+'\n| REQUIRED ARGUMENTS |\n'+config.sep_close - opt_args_header = config.sep_close+'\n| OPTIONAL ARGUMENTS |\n'+config.sep_close - # bullet = '❭' - # bullet = '★' - # bullet = '|' - bullet = ' ' - - optional_args0 = args_parser.add_argument_group(req_args_header+'\n\n'+bullet+'EXECUTION MODE') - optional_args0.add_argument('-exec_mode', dest='exec_mode', help='MODE 1: Tune both bonds lengths, angles/dihedrals values\n and their force constants\nMODE 2: Like MODE 1 but angles/dihedrals values in the prelim.\n CG model ITP are conserved during optimization\nMODE 3: Like MODE 1 but only dihedrals values in the prelim.\n CG model ITP are conserved during optimization', type=int, default=1, metavar=' (1)') - - required_args = args_parser.add_argument_group(bullet+'REFERENCE AA MODEL') - required_args.add_argument('-aa_tpr', dest='aa_tpr_filename', help=config.help_aa_tpr, type=str, default=config.metavar_aa_tpr, metavar=' '+scg.par_wrap(config.metavar_aa_tpr)) - required_args.add_argument('-aa_traj', dest='aa_traj_filename', help=config.help_aa_traj, type=str, default=config.metavar_aa_traj, metavar=' '+scg.par_wrap(config.metavar_aa_traj)) - required_args.add_argument('-cg_map', dest='cg_map_filename', help=config.help_cg_map, type=str, default=config.metavar_cg_map, metavar=' '+scg.par_wrap(config.metavar_cg_map)) - - sim_filenames_args = args_parser.add_argument_group(bullet+'CG MODEL OPTIMIZATION') - sim_filenames_args.add_argument('-cg_itp', dest='cg_itp_filename', help='ITP file of the CG model to optimize', type=str, default=config.metavar_cg_itp, metavar=' '+scg.par_wrap(config.metavar_cg_itp)) - sim_filenames_args.add_argument('-cg_gro', dest='gro_input_filename', help='Starting GRO file used for iterative simulation\nWill be minimized and relaxed before each MD run', type=str, default='start_conf.gro', metavar=' (start_conf.gro)') - sim_filenames_args.add_argument('-cg_top', dest='top_input_filename', help='TOP file used for iterative simulation', type=str, default='system.top', metavar=' (system.top)') - sim_filenames_args.add_argument('-cg_mdp_mini', dest='mdp_minimization_filename', help='MDP file used for minimization runs', type=str, default='mini.mdp', metavar=' (mini.mdp)') - sim_filenames_args.add_argument('-cg_mdp_equi', dest='mdp_equi_filename', help='MDP file used for equilibration runs', type=str, default='equi.mdp', metavar=' (equi.mdp)') - sim_filenames_args.add_argument('-cg_mdp_md', dest='mdp_md_filename', help='MDP file used for the MD runs analyzed for optimization', type=str, default='md.mdp', metavar=' (md.mdp)') - - optional_args4 = args_parser.add_argument_group(opt_args_header+'\n\n'+bullet+'FILES HANDLING') - optional_args4.add_argument('-in_dir', dest='input_folder', help='Additional prefix path used to find argument-provided files\nIf ambiguous, files found without prefix are preferred', type=str, default='.', metavar='') - optional_args4.add_argument('-out_dir', dest='output_folder', help='Directory where to store all outputs of this program\nDefault -out_dir is named after timestamp', type=str, default='', metavar='') - - optional_args1 = args_parser.add_argument_group(bullet+'GROMACS SETTINGS') - optional_args1.add_argument('-gmx', dest='gmx_path', help=config.help_gmx_path, type=str, default=config.gmx_path, metavar=' '+scg.par_wrap(config.gmx_path)) - optional_args1.add_argument('-nt', dest='nb_threads', help='Number of threads to use, forwarded to gmx mdrun -nt', type=int, default=0, metavar=' (0)') - optional_args1.add_argument('-gpu_id', dest='gpu_id', help='String (use quotes) space-separated list of GPU device IDs', type=str, default='', metavar='') - optional_args1.add_argument('-gmx_args_str', dest='gmx_args_str', help='String (use quotes) of arguments to forward to gmx mdrun\nIf provided, arguments -nt and -gpu_id are ignored', type=str, default='', metavar='') - optional_args1.add_argument('-mini_maxwarn', dest='mini_maxwarn', help='Max. number of warnings to ignore, forwarded to gmx\ngrompp -maxwarn at each minimization step', type=int, default=1, metavar=' (1)') - optional_args1.add_argument('-sim_kill_delay', dest='sim_kill_delay', help='Time (s) after which to kill a simulation that has not been\nwriting into its log file, in case a simulation gets stuck', type=int, default=60, metavar=' (60)') - - optional_args2 = args_parser.add_argument_group(bullet+'CG MODEL SCALING') - optional_args2.add_argument('-aa_rg_offset', dest='aa_rg_offset', help='Radius of gyration offset (nm) to be applied to AA data\naccording to your potential bonds rescaling (for display only)', type=float, default=0.00, metavar=' '+scg.par_wrap('0.00')) - optional_args2.add_argument('-bonds_scaling', dest='bonds_scaling', help=config.help_bonds_scaling, type=float, default=config.bonds_scaling, metavar=' '+scg.par_wrap(config.bonds_scaling)) - optional_args2.add_argument('-bonds_scaling_str', dest='bonds_scaling_str', help=config.help_bonds_scaling_str, type=str, default=config.bonds_scaling_str, metavar='') - optional_args2.add_argument('-min_bonds_length', dest='min_bonds_length', help=config.help_min_bonds_length, type=float, default=config.min_bonds_length, metavar=' '+scg.par_wrap(config.min_bonds_length)) - - optional_args5 = args_parser.add_argument_group(bullet+'CG MODEL SCORING') - optional_args5.add_argument('-cg_time_short', dest='sim_duration_short', help='Simulation time (ns) of the MD runs analyzed for optimization\nIn opti. cycles 1 and 2, this will modify MDP file for the MD runs', type=float, default=10, metavar=' (10)') - optional_args5.add_argument('-cg_time_long', dest='sim_duration_long', help='Simulation time (ns) of the MD runs analyzed for optimization\nIn opti. cycle 3, this will modify MDP file for the MD runs', type=float, default=25, metavar=' (25)') - optional_args5.add_argument('-b2a_score_fact', dest='bonds2angles_scoring_factor', help=config.help_bonds2angles_scoring_factor, type=float, default=config.bonds2angles_scoring_factor, metavar=' '+scg.par_wrap(config.bonds2angles_scoring_factor)) - optional_args5.add_argument('-bw_constraints', dest='bw_constraints', help=config.help_bw_constraints, type=float, default=config.bw_constraints, metavar=' '+scg.par_wrap(config.bw_constraints)) - optional_args5.add_argument('-bw_bonds', dest='bw_bonds', help=config.help_bw_bonds, type=float, default=config.bw_bonds, metavar=' '+scg.par_wrap(config.bw_bonds)) - optional_args5.add_argument('-bw_angles', dest='bw_angles', help=config.help_bw_angles, type=float, default=config.bw_angles, metavar=' '+scg.par_wrap(config.bw_angles)) - optional_args5.add_argument('-bw_dihedrals', dest='bw_dihedrals', help=config.help_bw_dihedrals, type=float, default=config.bw_dihedrals, metavar=' '+scg.par_wrap(config.bw_dihedrals)) - optional_args5.add_argument('-bonds_max_range', dest='bonded_max_range', help=config.help_bonds_max_range, type=float, default=config.bonds_max_range, metavar=' '+scg.par_wrap(config.bonds_max_range)) - - optional_args6 = args_parser.add_argument_group(bullet+'CG MODEL FORCE CONSTANTS') - optional_args6.add_argument('-max_fct_bonds_f1', dest='default_max_fct_bonds_opti', help=config.help_max_fct_bonds, type=float, default=config.default_max_fct_bonds_opti, metavar=' '+scg.par_wrap(config.default_max_fct_bonds_opti)) - optional_args6.add_argument('-max_fct_angles_f1', dest='default_max_fct_angles_opti_f1', help=config.help_max_fct_angles_f1, type=float, default=config.default_max_fct_angles_opti_f1, metavar=' '+scg.par_wrap(config.default_max_fct_angles_opti_f1)) - optional_args6.add_argument('-max_fct_angles_f2', dest='default_max_fct_angles_opti_f2', help=config.help_max_fct_angles_f2, type=float, default=config.default_max_fct_angles_opti_f2, metavar=' '+scg.par_wrap(config.default_max_fct_angles_opti_f2)) - optional_args6.add_argument('-max_fct_dihedrals_f149', dest='default_abs_range_fct_dihedrals_opti_func_with_mult', help=config.help_max_fct_dihedrals_with_mult, type=float, default=config.default_abs_range_fct_dihedrals_opti_func_with_mult, metavar=''+scg.par_wrap(config.default_abs_range_fct_dihedrals_opti_func_with_mult)) - optional_args6.add_argument('-max_fct_dihedrals_f2', dest='default_max_fct_dihedrals_opti_func_without_mult', help=config.help_max_fct_dihedrals_without_mult, type=float, default=config.default_max_fct_dihedrals_opti_func_without_mult, metavar=''+scg.par_wrap(config.default_max_fct_dihedrals_opti_func_without_mult)) - - optional_args3 = args_parser.add_argument_group(bullet+'OTHERS') - optional_args3.add_argument('-temp', dest='temp', help='Temperature used to perform Boltzmann inversion (K)', type=float, default=config.sim_temperature, metavar=' '+scg.par_wrap(config.sim_temperature)) - optional_args3.add_argument('-keep_all_sims', dest='keep_all_sims', help='Store all gmx files for all simulations, may use disk space', action='store_true', default=False) - optional_args3.add_argument('-h', '--help', help='Show this help message and exit', action='help') - optional_args3.add_argument('-v', '--verbose', dest='verbose', help=config.help_verbose, action='store_true', default=False) - - # display help if script was called without arguments - if len(sys.argv) == 1: - args_parser.print_help() - sys.exit() - - # arguments handling, display command line if help or no arguments provided - # argcomplete.autocomplete(parser) - ns = args_parser.parse_args() - input_cmdline = ' '.join(map(cmd_quote, sys.argv)) - ns.exec_folder = time.strftime("MODEL_OPTI__STARTED_%d-%m-%Y_%Hh%Mm%Ss") # default folder name for all files of this optimization run, in case none is provided - if ns.output_folder != '': - ns.exec_folder = ns.output_folder - print('Working directory:', os.getcwd()) - print('Command line:', input_cmdline) - print('Results directory:', ns.exec_folder) - - # namespace variables not directly linked to arguments for plotting or for global package interpretation - ns.mismatch_order = False - ns.row_x_scaling = True - ns.row_y_scaling = True - ns.ncols_max = 0 # 0 to display all - # ns.atom_only = False - ns.molname_in = None # if None the first found using TPR atom ordering will be used - ns.process_alive_time_sleep = 10 # nb of seconds between process alive check cycles - ns.process_alive_nb_cycles_dead = int(ns.sim_kill_delay / ns.process_alive_time_sleep) # nb of cycles without .log file bytes size changes to determine that the MD run is stuck - ns.bonds_rescaling_performed = False # for user information display - - # get basenames for simulation files - ns.cg_itp_basename = os.path.basename(ns.cg_itp_filename) - ns.gro_input_basename = os.path.basename(ns.gro_input_filename) - ns.top_input_basename = os.path.basename(ns.top_input_filename) - ns.mdp_minimization_basename = os.path.basename(ns.mdp_minimization_filename) - ns.mdp_equi_basename = os.path.basename(ns.mdp_equi_filename) - ns.mdp_md_basename = os.path.basename(ns.mdp_md_filename) - - - #################### - # ARGUMENTS CHECKS # - #################### - - print() - print() - print(config.sep_close) - print('| PRE-PROCESSING AND CONTROLS |') - print(config.sep_close) - # print() - - # TODO: check that at least 10-20% of the simulations of the 1st swarm iteration finished properly, otherwise lower all energies or tell the user he is not writting into the log file regularly enough - # TODO: test this program with ITP files that contain all the different dihedral functions, angles functions, constraints etc - # TODO: find some fuzzy logic to determine number of swarm iterations + take some large margin to ensure it will optimize correctly - - # avoid overwriting an output directory of a previous optimization run - if os.path.isfile(ns.exec_folder) or os.path.isdir(ns.exec_folder): - sys.exit(config.header_error+'Provided output folder already exists, please delete existing folder manually or provide another folder name.') - - # check if we can find files at user-provided location(s) - arg_entries = vars(ns) # dict view of the arguments namespace - user_provided_filenames = ['aa_tpr_filename', 'aa_traj_filename', 'cg_map_filename', 'cg_itp_filename', 'gro_input_filename', 'top_input_filename', 'mdp_minimization_filename', 'mdp_equi_filename', 'mdp_md_filename'] - args_names = ['aa_tpr', 'aa_traj', 'cg_map', 'cg_itp', 'cg_sim_gro', 'cg_sim_top', 'cg_sim_mdp_mini', 'cg_sim_mdp_equi', 'cg_sim_mdp_md'] - - for i in range(len(user_provided_filenames)): - arg_entry = user_provided_filenames[i] - if not os.path.isfile(arg_entries[arg_entry]): - data_folder_path = ns.input_folder+'/'+arg_entries[arg_entry] - if ns.input_folder != '.' and os.path.isfile(data_folder_path): - arg_entries[arg_entry] = data_folder_path - else: - if ns.input_folder == '': - data_folder_path = arg_entries[arg_entry] - sys.exit(config.header_error+'Cannot find file for argument -'+args_names[i]+' (expected at location: '+data_folder_path+')') - - # check that gromacs alias is correct - with open(os.devnull, 'w') as devnull: - try: - subprocess.call(ns.gmx_path, stdout=devnull, stderr=devnull) - except OSError: - sys.exit(config.header_error+'Cannot find GROMACS using alias \''+ns.gmx_path+'\', please provide the right GROMACS alias or path') - - # check that ITP filename for the model to optimize is indeed included in the TOP file of the simulation directory - # then find all TOP includes for copying files for simulations at each iteration - top_includes_filenames = [] - with open(ns.top_input_filename, 'r') as fp: - all_top_lines = fp.read() - if ns.cg_itp_basename not in all_top_lines: - sys.exit(config.header_error+'The CG ITP model filename you provided is not included in your TOP file') - - top_lines = all_top_lines.split('\n') - top_lines = [top_line.strip().split(';')[0] for top_line in top_lines] # split for comments - for top_line in top_lines: - if top_line.startswith('#include'): - top_include = top_line.split()[1].replace('"', '').replace("'", '') # remove potential single and double quotes - top_includes_filenames.append(top_include) - # TODO: VERIFY THE PRESENCE OF ALLLLLLLLL TOP FILES, NOT ONLY THE CG MODEL'S - - # check gmx arguments conflicts - if ns.gmx_args_str != '' and (ns.nb_threads != 0 or ns.gpu_id != ''): - print(config.header_warning+'Argument -gmx_args_str is provided together with one of arguments: -nb_threads, -gpu_id\nOnly argument -gmx_args_str will be used during this execution') - - # check bonds scaling arguments conflicts - if (ns.bonds_scaling != config.bonds_scaling and ns.min_bonds_length != config.min_bonds_length) or (ns.bonds_scaling != config.bonds_scaling and ns.bonds_scaling_str != config.bonds_scaling_str) or (ns.min_bonds_length != config.min_bonds_length and ns.bonds_scaling_str != config.bonds_scaling_str): - sys.exit(config.header_error+'Only one of arguments -bonds_scaling, -bonds_scaling_str and -min_bonds_length can be provided\nPlease check your parameters') - # if ns.bonds_scaling < 1: - # sys.exit(config.header_error+'Bonds scaling factor is inferior to 1, please check your parameters') - - - ################## - # INITIALIZATION # - ################## - - scg.set_MDA_backend(ns) - ns.mda_backend = 'serial' # clusters execution - - # directory to write all files for current execution of optimizations routines - os.mkdir(ns.exec_folder) - os.mkdir(ns.exec_folder+'/.internal') - os.mkdir(ns.exec_folder+'/'+config.distrib_plots_all_evals_dirname) - os.mkdir(ns.exec_folder+'/'+config.log_files_all_evals_dirname) - if ns.keep_all_sims: - os.mkdir(ns.exec_folder+'/'+config.sim_files_all_evals_dirname) - - # prepare a directory to be copied at each iteration of the optimization, to run the new simulation - os.mkdir(ns.exec_folder+'/'+config.input_sim_files_dirname) - user_provided_sim_files = ['cg_itp_filename', 'gro_input_filename', 'top_input_filename', 'mdp_minimization_filename', 'mdp_equi_filename', 'mdp_md_filename'] - - for sim_file in user_provided_sim_files: - shutil.copy(arg_entries[sim_file], ns.exec_folder+'/'+config.input_sim_files_dirname) - - # get all TOP file includes copied into input simulation directory - top_include_dirbase = os.path.dirname(arg_entries['top_input_filename']) - for top_include in top_includes_filenames: - # shutil.copy(top_include_dirbase+'/'+top_include, ns.exec_folder+'/'+config.input_sim_files_dirname) # PROBLEM LUCA - # shutil.copy(ns.input_folder+'/'+top_include_dirbase+'/'+top_include, ns.exec_folder+'/'+config.input_sim_files_dirname) # PROBLEM WITH PIP - - # print(ns.input_folder, top_include_dirbase, top_include) - shutil.copy(ns.input_folder+'/'+top_include, ns.exec_folder+'/'+config.input_sim_files_dirname) - - # modify the TOP file to adapt includes paths - with open(ns.exec_folder+'/'+config.input_sim_files_dirname+'/'+ns.top_input_basename, 'r') as fp: - all_top_lines = fp.read().split('\n') - with open(ns.exec_folder+'/'+config.input_sim_files_dirname+'/'+ns.top_input_basename, 'w+') as fp: - nb_includes = 0 - for i in range(len(all_top_lines)): - if all_top_lines[i].startswith('#include'): - all_top_lines[i] = '#include "'+os.path.basename(top_includes_filenames[nb_includes])+'"' - nb_includes += 1 - fp.writelines('\n'.join(all_top_lines)) - - ns.nb_eval = 0 # global count of evaluation steps - ns.start_opti_ts = datetime.now().timestamp() - ns.total_eval_time, ns.total_gmx_time, ns.total_model_eval_time = 0, 0, 0 - - scg.create_bins_and_dist_matrices(ns) # bins for EMD calculations - scg.read_ndx_atoms2beads(ns) # read mapping, get atoms accurences in beads - scg.get_atoms_weights_in_beads(ns) # get weights of atoms within beads - - print() - - # read starting CG ITP file - with open(ns.cg_itp_filename, 'r') as fp: - itp_lines = fp.read().split('\n') - itp_lines = [itp_line.strip() for itp_line in itp_lines] - scg.read_cg_itp_file(ns, itp_lines) # loads ITP object that contains our reference atomistic data -- won't ever be modified during execution - - # touch results files to be appended to later - with open(ns.exec_folder+'/'+config.opti_perf_recap_file, 'w') as fp: - # TODO: print that file has been generated with Swarm-CG etc -- do this for basically all files - # TODO: add some info on the opti cycles ?? - fp.write('# nb constraints: '+str(ns.nb_constraints)+'\n') - fp.write('# nb bonds: '+str(ns.nb_bonds)+'\n') - fp.write('# nb angles: '+str(ns.nb_angles)+'\n') - fp.write('# nb dihedrals: '+str(ns.nb_dihedrals)+'\n') - fp.write('#\n') - fp.write('# opti_cycle nb_eval fit_score_all fit_score_cstrs_bonds fit_score_angles fit_score_dihedrals eval_score Rg_AA_mapped Rg_CG parameters_set eval_time current_total_time\n') - with open(ns.exec_folder+'/'+config.opti_pairwise_distances_file, 'w'): - pass - - # process specific bonds scaling string, if provided - ns.bonds_scaling_specific = None - if ns.bonds_scaling_str != config.bonds_scaling_str: - sp_str = ns.bonds_scaling_str.split() - if len(sp_str) % 2 != 0: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nPlease check your parameters, or help for an example') - ns.bonds_scaling_specific = dict() - i = 0 - try: - while i < len(sp_str): - geom_id = sp_str[i][1:] - if sp_str[i][0].upper() == 'C': - if int(geom_id) > ns.nb_constraints: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nA constraint group id exceeds the number of constraints groups defined in the input CG ITP file\nPlease check your parameters, or help for an example') - if not 'C'+geom_id in ns.bonds_scaling_specific: - if float(sp_str[i+1]) < 0: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nYou cannot provide negative values for average distribution length\nPlease check your parameters, or help for an example') - ns.bonds_scaling_specific['C'+geom_id] = float(sp_str[i+1]) - else: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nA constraint group id is provided multiple times (id: '+str(geom_id)+')\nPlease check your parameters, or help for an example') - elif sp_str[i][0].upper() == 'B': - if int(geom_id) > ns.nb_bonds: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nA bond group id exceeds the number of bonds groups defined in the input CG ITP file\nPlease check your parameters, or help for an example') - if not 'B'+geom_id in ns.bonds_scaling_specific: - if float(sp_str[i+1]) < 0: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nYou cannot provide negative values for average distribution length\nPlease check your parameters, or help for an example') - ns.bonds_scaling_specific['B'+geom_id] = float(sp_str[i+1]) - else: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nA bond group id is provided multiple times (id: '+str(geom_id)+')\nPlease check your parameters, or help for an example') - i += 2 - except ValueError: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nPlease check your parameters, or help for an example') - - # read atom mapped trajectory + find domains boundaries for values ranges (NOT the force constants, for which it is config/user defined already) - print() - scg.read_aa_traj(ns) - scg.load_aa_data(ns) - scg.make_aa_traj_whole_for_selected_mols(ns) - - print('\nCalculating bonds, angles and dihedrals distributions for reference AA-mapped model') - - # for each CG bead, create atom groups for trajectory geoms calculation using mass and atom weights across beads - scg.get_beads_MDA_atomgroups(ns) - - ns.gyr_aa_mapped, ns.gyr_aa_mapped_std = None, None # will be computed one single time with model evaluation script - ns.sasa_aa_mapped, ns.sasa_aa_mapped_std = None, None # will be computed one single time with model evaluation script - - ns.domains_val = {'constraint': [], 'bond': [], 'angle': [], 'dihedral': []} - ns.data_BI = {'bond': [], 'angle': [], 'dihedral': []} # store hists for BI, std and possibly some other stats - - # create all ref atom histograms to be used for pairwise distributions comparisons + find average geoms values as first guesses (without BI at this point) - # get ref atom hists + find very first distances guesses for constraints groups - for grp_constraint in range(ns.nb_constraints): - - constraint_avg, constraint_hist, constraint_values = scg.get_AA_bonds_distrib(ns, beads_ids=ns.cg_itp['constraint'][grp_constraint]['beads'], grp_type='constraint group', grp_nb=grp_constraint) - # if ns.exec_mode == 1: - ns.cg_itp['constraint'][grp_constraint]['value'] = constraint_avg - ns.cg_itp['constraint'][grp_constraint]['avg'] = constraint_avg - ns.cg_itp['constraint'][grp_constraint]['hist'] = constraint_hist - - ns.domains_val['constraint'].append([round(np.min(constraint_values), 3), round(np.max(constraint_values), 3)]) - - # get ref atom hists + find very first distances and force constants guesses for bonds groups - for grp_bond in range(ns.nb_bonds): - - bond_avg, bond_hist, bond_values = scg.get_AA_bonds_distrib(ns, beads_ids=ns.cg_itp['bond'][grp_bond]['beads'], grp_type='bond group', grp_nb=grp_bond) - # if ns.exec_mode == 1: - ns.cg_itp['bond'][grp_bond]['value'] = bond_avg - ns.cg_itp['bond'][grp_bond]['avg'] = bond_avg - ns.cg_itp['bond'][grp_bond]['hist'] = bond_hist - - xmin, xmax = min(np.inf, ns.bins_bonds[np.min(np.nonzero(bond_hist))]), max(-np.inf, ns.bins_bonds[np.max(np.nonzero(bond_hist))+1]) - xmin, xmax = xmin-ns.bw_bonds, xmax+ns.bw_bonds - ns.data_BI['bond'].append([np.histogram(bond_values, range=(xmin, xmax), bins=config.bi_nb_bins)[0], np.std(bond_values), np.mean(bond_values), (xmin, xmax)]) - - ns.domains_val['bond'].append([round(np.min(bond_values), 3), round(np.max(bond_values), 3)]) # boundaries of force constats during optimization - - # get ref atom hists + find very first values and force constants guesses for angles groups - for grp_angle in range(ns.nb_angles): - - angle_avg, angle_hist, angle_values_deg, angle_values_rad = scg.get_AA_angles_distrib(ns, beads_ids=ns.cg_itp['angle'][grp_angle]['beads']) - if ns.exec_mode == 1 or ns.exec_mode == 3: - ns.cg_itp['angle'][grp_angle]['value'] = angle_avg - ns.cg_itp['angle'][grp_angle]['avg'] = angle_avg - ns.cg_itp['angle'][grp_angle]['hist'] = angle_hist - - xmin, xmax = min(np.inf, ns.bins_angles[np.min(np.nonzero(angle_hist))]), max(-np.inf, ns.bins_angles[np.max(np.nonzero(angle_hist))+1]) - xmin, xmax = xmin+ns.bw_angles/2, xmax-ns.bw_angles/2 - ns.data_BI['angle'].append([np.histogram(angle_values_rad, range=(np.deg2rad(xmin), np.deg2rad(xmax)), bins=config.bi_nb_bins)[0], np.std(angle_values_rad), (xmin, xmax)]) - - ns.domains_val['angle'].append([round(np.min(angle_values_deg), 2), round(np.max(angle_values_deg), 2)]) # boundaries of force constats during optimization - - # get ref atom hists + find very first values and force constants guesses for dihedrals groups - for grp_dihedral in range(ns.nb_dihedrals): - - dihedral_avg, dihedral_hist, dihedral_values_deg, dihedral_values_rad = scg.get_AA_dihedrals_distrib(ns, beads_ids=ns.cg_itp['dihedral'][grp_dihedral]['beads']) - if ns.exec_mode == 1: # the angle value for dihedral will be calculated from the BI fit, because for dihedrals it makes no sense to use the average - ns.cg_itp['dihedral'][grp_dihedral]['value'] = dihedral_avg - ns.cg_itp['dihedral'][grp_dihedral]['avg'] = dihedral_avg - ns.cg_itp['dihedral'][grp_dihedral]['hist'] = dihedral_hist - - xmin, xmax = -180, 180 - ns.data_BI['dihedral'].append([np.histogram(dihedral_values_rad, range=(np.deg2rad(xmin), np.deg2rad(xmax)), bins=2*config.bi_nb_bins)[0], np.std(dihedral_values_rad), np.mean(dihedral_values_rad), (xmin, xmax)]) - - ns.domains_val['dihedral'].append([round(np.min(dihedral_values_deg), 2), round(np.max(dihedral_values_deg), 2)]) # boundaries of force constats during optimization - - if not ns.bonds_rescaling_performed: - print(' No bonds rescaling performed') - - # output png with all the reference distributions, so the user can check - ns.atom_only = True - ns.plot_filename = ns.exec_folder+'/'+config.ref_distrib_plots - with open(os.devnull, 'w') as devnull: - with contextlib.redirect_stdout(devnull): - scg.compare_models(ns, manual_mode=False) - print() - print('Plotted reference AA-mapped distributions (used as target during optimization) at location:\n ', ns.exec_folder+'/'+config.ref_distrib_plots) - ns.atom_only = False - - - ################################## - # ITERATIVE OPTIMIZATION PROCESS # - ################################## - - # parameters for each type of simulation during optimization cycles - # sim duration (ns), max nb of SWARM iterations, max nb SWARM iterations without finding new global best, percentage applied for generating variations around initial guesses/values fed humanly - # sim_type 0 is used for initialization exclusively + detecting too high force constants to lower them, no real optimization is expected from these runs - - # Settings: TEST / utlra-fast settings only for debugging -- DIHEDRALS APPLIED IN THE END EXCLUSIVELY - # sim_types = {0: {'sim_duration': 0.3, 'max_swarm_iter': 1, 'max_swarm_iter_without_new_global_best': 1, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.3}, - # 1: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.3}, - # 2: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.2}, - # 3: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 0.4, 'fct_guess_fact': 0.1}} - # opti_cycles = [['constraint', 'bond', 'angle'], ['constraint', 'bond'], ['angle'], ['constraint', 'bond', 'angle'], ['dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects - # sim_cycles = [0, 1, 1, 2, 2, 3] # simulations types - - # Settings: TEST / utlra-fast settings only for debugging -- DIHEDRALS APPLIED IN THE END EXCLUSIVELY - # sim_types = {0: {'sim_duration': 0.3, 'max_swarm_iter': 1, 'max_swarm_iter_without_new_global_best': 1, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.3}, - # 1: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.3}, - # 2: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.2}, - # 3: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 0.4, 'fct_guess_fact': 0.1}} - # opti_cycles = [['constraint', 'bond', 'angle'], ['constraint', 'bond', 'angle'], ['dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects - # sim_cycles = [0, 2, 2, 3] # simulations types - - # Settings: ROBUST / Suited for big molecules - # sim_types = {0: {'sim_duration': 5, 'max_swarm_iter': 10, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 1, 'fct_guess_fact': 0.30}, - # 1: {'sim_duration': 8, 'max_swarm_iter': 10, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.25}, - # 2: {'sim_duration': 10, 'max_swarm_iter': 10, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.25}, - # 3: {'sim_duration': 15, 'max_swarm_iter': 20, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.25}} - # opti_cycles = [['constraint', 'bond', 'angle'], ['constraint', 'bond'], ['angle'], ['constraint', 'bond', 'angle'], ['dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects - # sim_cycles = [0, 1, 1, 3, 2, 3] # simulations types - - # Strategy 1 - # Settings: FASTER / Suited for small molecules or rapid optimization - # sim_types = {0: {'sim_duration': 10, 'max_swarm_iter': 10, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 1, 'fct_guess_fact': 0.40}, - # 1: {'sim_duration': 10, 'max_swarm_iter': 10, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.30}, - # 2: {'sim_duration': 15, 'max_swarm_iter': 15, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.25}} - # opti_cycles = [['constraint', 'bond', 'angle'], ['dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects - # sim_cycles = [0, 1, 2] # simulations types - - # THIS IS THE CURRENT CHOICE - # Startegy 4 - # Settings: OPTIMAL / Should be fine with any type of molecule, big or small, as long as the BI keeps yielding close enough results, which should be the case - # sim_types = {0: {'sim_duration': 10, 'max_swarm_iter': int(5+np.sqrt(ns.nb_constraints+ns.nb_bonds+ns.nb_angles)), 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 1, 'fct_guess_fact': 0.35}, - # 1: {'sim_duration': 10, 'max_swarm_iter': int(5+np.sqrt(ns.nb_angles+ns.nb_dihedrals)), 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.30}, - # 2: {'sim_duration': 10, 'max_swarm_iter': int(5+np.sqrt(ns.nb_constraints+ns.nb_bonds+ns.nb_angles+ns.nb_dihedrals)), 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.15, 'fct_guess_fact': 0.20}} - # opti_cycles = [['constraint', 'bond', 'angle'], ['angle', 'dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects - # sim_cycles = [0, 1, 2] # simulations types - - # Startegy 5 -- Coupled to fewer particles - # Settings: OPTIMAL / Should be fine with any type of molecule, big or small, as long as the BI keeps yielding close enough results, which should be the case - sim_types = {0: {'sim_duration': ns.sim_duration_short, 'max_swarm_iter': int(round(6+np.sqrt(ns.nb_constraints+ns.nb_bonds+ns.nb_angles))), 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 1, 'fct_guess_fact': 0.40}, - 1: {'sim_duration': ns.sim_duration_short, 'max_swarm_iter': int(round(6+np.sqrt(ns.nb_angles+ns.nb_dihedrals))), 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.30}, - 2: {'sim_duration': ns.sim_duration_long, 'max_swarm_iter': int(round(6+np.sqrt(ns.nb_constraints+ns.nb_bonds+ns.nb_angles+ns.nb_dihedrals))), 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.20}} - opti_cycles = [['constraint', 'bond', 'angle'], ['angle', 'dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects - sim_cycles = [0, 1, 2] # simulations types - - # for tests - # sim_types = {0: {'sim_duration': ns.sim_duration_short, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 1, 'fct_guess_fact': 0.40}, - # 1: {'sim_duration': ns.sim_duration_short, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.30}, - # 2: {'sim_duration': ns.sim_duration_long, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.20}} - # opti_cycles = [['constraint', 'bond', 'angle'], ['angle', 'dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects - # sim_cycles = [0, 1, 2] # simulations types - - # NOTE: currently, due to an issue in FST-PSO, number of swarm iterations performed is +2 when compared to the numbers we feed - - ns.opti_itp = copy.deepcopy(ns.cg_itp) # the ITP object that will be optimized stepwise, at the end of each optimization cycle (geom type wise) - ns.eval_nb_geoms = {'constraint': 0, 'bond': 0, 'angle': 0, 'dihedral': 0} # geoms to optimize at each step - - # remove dihedrals from cycles if CG ITP file does NOT contain dihedrals - if ns.nb_dihedrals == 0: - opti_cycles_cp, sim_cycles_cp = [], [] - nb_poped = 0 - for i in range(len(opti_cycles)): - opti_cycles_cp.extend([[]]) - for j in range(len(opti_cycles[i])): - if opti_cycles[i][j] != 'dihedral': - opti_cycles_cp[i-nb_poped].append(opti_cycles[i][j]) - if len(opti_cycles_cp[i-nb_poped]) == 0: - opti_cycles_cp.pop() - nb_poped += 1 - else: - sim_cycles_cp.extend([sim_cycles[i]]) - opti_cycles, sim_cycles = opti_cycles_cp, sim_cycles_cp - # print(opti_cycles) - - # state variables for the cycles of optimization - ns.performed_init_BI = {'bond': False, 'angle': False, 'dihedral': False} - ns.opti_geoms_all = set(geom for opti_cycle_geoms in opti_cycles for geom in opti_cycle_geoms) - ns.best_fitness = [np.inf, None] # fitness_score, eval_step_best_score - - # storage for best independent set of parameters by geom, for initialization of a (few ?) special particle after 1st opti cycle - ns.all_best_emd_dist_geoms = {'constraints': {}, 'bonds': {}, 'angles': {}, 'dihedrals': {}} - ns.all_best_params_dist_geoms = {'constraints': {}, 'bonds': {}, 'angles': {}, 'dihedrals': {}} - for i in range(ns.nb_constraints): - ns.all_best_emd_dist_geoms['constraints'][i] = config.sim_crash_EMD_indep_score - ns.all_best_params_dist_geoms['constraints'][i] = {} - for i in range(ns.nb_bonds): - ns.all_best_emd_dist_geoms['bonds'][i] = config.sim_crash_EMD_indep_score - ns.all_best_params_dist_geoms['bonds'][i] = {} - for i in range(ns.nb_angles): - ns.all_best_emd_dist_geoms['angles'][i] = config.sim_crash_EMD_indep_score - ns.all_best_params_dist_geoms['angles'][i] = {} - for i in range(ns.nb_dihedrals): - ns.all_best_emd_dist_geoms['dihedrals'][i] = config.sim_crash_EMD_indep_score - ns.all_best_params_dist_geoms['dihedrals'][i] = {} - - - ############################# - # START OPTIMIZATION CYCLES # - ############################# - - for i in range(len(opti_cycles)): - - ns.opti_cycle = {'nb_cycle': i+1, 'geoms': opti_cycles[i], 'nb_geoms': {'constraint': 0, 'bond': 0, 'angle': 0, 'dihedral': 0}} - ns.out_itp = copy.deepcopy(ns.opti_itp) # input ITP copy, on which we might perform BI, and that is the object we will modify at each evaluation step to store the values from FST-PSO - - # model selection based on fitness + Rg during last optimization cycle - # ns.all_rg_last_cycle, ns.all_fitness_last_cycle = np.array([]), np.array([]) - # ns.best_fitness_Rg_combined = 0 # id of the best model based on bonded fitness + Rg selection - - ns.prod_sim_time = sim_types[sim_cycles[i]]['sim_duration'] - ns.val_guess_fact = sim_types[sim_cycles[i]]['val_guess_fact'] - ns.fct_guess_fact = sim_types[sim_cycles[i]]['fct_guess_fact'] - ns.max_swarm_iter = sim_types[sim_cycles[i]]['max_swarm_iter'] - ns.max_swarm_iter_without_new_global_best = sim_types[sim_cycles[i]]['max_swarm_iter_without_new_global_best'] - - # adapt number of geoms according to the optimization cycle - geoms_display = [] - if 'constraint' in ns.opti_cycle['geoms'] or 'bond' in ns.opti_cycle['geoms']: - geoms_display.append('constraints/bonds') - if 'constraint' in ns.opti_cycle['geoms']: - ns.opti_cycle['nb_geoms']['constraint'] = ns.nb_constraints - if 'bond' in ns.opti_cycle['geoms']: - ns.opti_cycle['nb_geoms']['bond'] = ns.nb_bonds - if 'angle' in ns.opti_cycle['geoms']: - ns.opti_cycle['nb_geoms']['angle'] = ns.nb_angles - geoms_display.append('angles') - if 'dihedral' in ns.opti_cycle['geoms']: - ns.opti_cycle['nb_geoms']['dihedral'] = ns.nb_dihedrals - geoms_display.append('dihedrals') - geoms_display = ' & '.join(geoms_display) + # TODO: allow to feed a JSON file or DICT-like string for which bonds group to rescale for AA + # TODO: allow to feed a JSON file for cycles of optimization ?? this is more optional but useful for big stuff possibly + # TODO: if using SASA through GMX SASA, ensure vdwradii.dat contains the MARTINI radii + # TODO: give a warning when users specify a bond scaling without specifying an Rg offset !!! + + # TODO: AT OPTI CYCLE 2, FIND ANGLES THAT ARE TOO STEEP (CG) AND WHEN GENERATING THE NEW GUESSES, PUT 10-30-50-70% OF THE CURRENT BEST FORCE CONSTANT IN SEVERAL PARTICLES !!!!!!!!! + + # NOTE: gmx trjconv and sasa may produce bugs when using TPR produced with gromacs v5, only current solution seems to be implementing the SASA calculation using MDTraj + + ##################################### + # ARGUMENTS HANDLING / HELP DISPLAY # + ##################################### + + # namespace variables not directly linked to arguments for plotting or for global package interpretation + ns.mismatch_order = False + ns.row_x_scaling = True + ns.row_y_scaling = True + ns.ncols_max = 0 # 0 to display all + # ns.atom_only = False + ns.molname_in = None # if None the first found using TPR atom ordering will be used + ns.process_alive_time_sleep = 10 # nb of seconds between process alive check cycles + ns.process_alive_nb_cycles_dead = int(ns.sim_kill_delay / ns.process_alive_time_sleep) # nb of cycles without .log file bytes size changes to determine that the MD run is stuck + ns.bonds_rescaling_performed = False # for user information display + + # get basenames for simulation files + ns.cg_itp_basename = os.path.basename(ns.cg_itp_filename) + ns.gro_input_basename = os.path.basename(ns.gro_input_filename) + ns.top_input_basename = os.path.basename(ns.top_input_filename) + ns.mdp_minimization_basename = os.path.basename(ns.mdp_minimization_filename) + ns.mdp_equi_basename = os.path.basename(ns.mdp_equi_filename) + ns.mdp_md_basename = os.path.basename(ns.mdp_md_filename) + + + #################### + # ARGUMENTS CHECKS # + #################### print() + print(swarmcg.shared.styling.sep_close) + print('| PRE-PROCESSING AND CONTROLS |') + print(swarmcg.shared.styling.sep_close) print() - print(config.sep_close) - print('| STARTING OPTIMIZATION CYCLE', ns.opti_cycle['nb_cycle'], ' |') - print('| Optimizing', geoms_display, ' '*(95-16-len(geoms_display)), '|') - print(config.sep_close) - - # actual BI to get the initial guesses of force constants, for all selected geoms at this given optimization step - # BI is performed: - # -- exec_mode 1: all values and force constants - # -- exec_mode 2: values are not touched for angles and dihedrals, but all force constants are estimated - # -- exec_mode 3: values are not touched for dihedrals, but all force constants are estimated - scg.perform_BI(ns) # performed on object ns.out_itp - - # build vector for search space boundaries + create variations around the BI initial guesses - search_space_boundaries = scg.get_search_space_boundaries(ns) - # ns.worst_fit_score = round(len(search_space_boundaries) * config.sim_crash_EMD_indep_score, 3) - ns.worst_fit_score = round(\ - np.sqrt((ns.nb_constraints+ns.nb_bonds) * config.sim_crash_EMD_indep_score) + \ - np.sqrt(ns.nb_angles * config.sim_crash_EMD_indep_score) + \ - np.sqrt(ns.nb_dihedrals * config.sim_crash_EMD_indep_score) \ - , 3) - # nb_particles = int(10 + 2*np.sqrt(len(search_space_boundaries))) # formula used by FST-PSO to choose nb of particles, which defines the number of initial guesses we can use - nb_particles = int(round(2 + np.sqrt(len(search_space_boundaries)))) # adapted to have less particles and fitted to our problems, which has good initial guesses and error driven initialization - # nb_particles = 2 # for tests - initial_guess_list = scg.get_initial_guess_list(ns, nb_particles) - - # actual optimization + + # TODO: check that at least 10-20% of the simulations of the 1st swarm iteration finished properly, otherwise lower all energies or tell the user he is not writting into the log file regularly enough + # TODO: test this program with ITP files that contain all the different dihedral functions, angles functions, constraints etc + # TODO: find some fuzzy logic to determine number of swarm iterations + take some large margin to ensure it will optimize correctly + + # avoid overwriting an output directory of a previous optimization run + if os.path.isfile(ns.exec_folder) or os.path.isdir(ns.exec_folder): + msg = ( + "Provided output folder already exists, please delete existing folder " + "manually or provide another folder name." + ) + raise exceptions.AvoidOverwritingFolder(msg) + + # check the mapping type + ns.mapping_type = ns.mapping_type.upper() + if ns.mapping_type != 'COM' and ns.mapping_type != 'COG': + msg = "Mapping type provided via argument '-mapping' must be either COM or COG (Center of Mass or Center of Geometry)." + raise exceptions.InputArgumentError(msg) + + # check if we can find files at user-provided location(s) + # here the order of the args in the 2 lists below is important, be very careful if changing this or adding args + arg_entries = vars(ns) # dict view of the arguments namespace + user_provided_filenames = ['aa_tpr_filename', 'aa_traj_filename', 'cg_map_filename', 'cg_itp_filename', 'gro_input_filename', 'top_input_filename', 'mdp_minimization_filename', 'mdp_equi_filename', 'mdp_md_filename'] + args_names = ['aa_tpr', 'aa_traj', 'cg_map', 'cg_itp', 'cg_gro', 'cg_top', 'cg_mdp_mini', 'cg_mdp_equi', 'cg_mdp_md'] + + for i in range(len(user_provided_filenames)): + filename_out_directory = arg_entries[user_provided_filenames[i]] + + if not os.path.isfile(filename_out_directory): + + # if an input folder is specified (because '.' is the default input_folder) + if ns.input_folder != '.': + filename_in_directory = ns.input_folder+'/'+arg_entries[user_provided_filenames[i]] + if not os.path.isfile(filename_in_directory): + msg = ( + 'Cannot find file for argument -{} ' + '(expected at location: {})'.format(args_names[i], filename_in_directory) + ) + raise FileNotFoundError(msg) + else: + arg_entries[user_provided_filenames[i]] = filename_in_directory + + else: + msg = ( + 'Cannot find file for argument -{} ' + '(expected at location: {})'.format(args_names[i], filename_out_directory) + ) + raise FileNotFoundError(msg) + + # check that gromacs alias is correct with open(os.devnull, 'w') as devnull: - with contextlib.redirect_stdout(devnull): - FP = FuzzyPSO() - FP.set_search_space(search_space_boundaries) - FP.set_swarm_size(nb_particles) - FP.set_fitness(fitness=scg.eval_function, arguments=ns, skip_test=True) - result = FP.solve_with_fstpso(max_iter=ns.max_swarm_iter, initial_guess_list=initial_guess_list, max_iter_without_new_global_best=ns.max_swarm_iter_without_new_global_best) - - # update ITP object with the best solution using geoms considered at this given optimization step - scg.update_cg_itp_obj(ns, parameters_set=result[0].X, update_type=2) - - # clean temporary copied directory with user's input files - shutil.rmtree(ns.exec_folder+'/'+config.input_sim_files_dirname) - - # print some stats - total_time_sec = datetime.now().timestamp() - ns.start_opti_ts - total_time = round(total_time_sec / (60 * 60), 2) - fitness_eval_time = round(ns.total_eval_time / (60 * 60), 2) - init_time = round((total_time_sec - ns.total_eval_time) / (60 * 60), 2) - ns.total_gmx_time = round(ns.total_gmx_time / (60 * 60), 2) - ns.total_model_eval_time = round(ns.total_model_eval_time / (60 * 60), 2) - print() - print(config.sep_close) - print('| FINISHED PROPERLY |') - print(config.sep_close) - print() - print('Total nb of evaluation steps:', ns.nb_eval) - print('Best model obtained at evaluation step number:', ns.best_fitness[1]) - print() - print('Total execution time :', total_time, 'h') - print('Initialization time :', init_time, 'h ('+str(round(init_time/total_time*100, 2))+' %)') - print('Simulations time :', ns.total_gmx_time, 'h ('+str(round(ns.total_gmx_time/total_time*100, 2))+' %)') - print('Models scoring time :', ns.total_model_eval_time, 'h ('+str(round(ns.total_model_eval_time/total_time*100, 2))+' %)') - print() + try: + subprocess.call(ns.gmx_path, stdout=devnull, stderr=devnull) + except OSError: + msg = ( + f"Cannot find GROMACS using alias {ns.gmx_path}, please provide " + f"the right GROMACS alias or path" + ) + raise exceptions.ExecutableNotFound(msg) + + # check that ITP filename for the model to optimize is indeed included in the TOP file of the simulation directory + # then find all TOP includes for copying files for simulations at each iteration + top_includes_filenames = [] + with open(arg_entries[user_provided_filenames[5]], 'r') as fp: + all_top_lines = fp.read() + if ns.cg_itp_basename not in all_top_lines: + msg = "The CG ITP model filename you provided is not included in your TOP file." + raise exceptions.MDSimulationInputError(msg) + + top_lines = all_top_lines.split('\n') + top_lines = [top_line.strip().split(';')[0] for top_line in top_lines] # the split removes comments + for top_line in top_lines: + if top_line.startswith('#include'): + top_include = top_line.split()[1].replace('"', '').replace("'", '') # remove potential single and double quotes around filenames + arg_dirname = os.path.dirname(arg_entries[user_provided_filenames[5]]) + if arg_dirname == '': + arg_dirname = '.' + top_includes_filenames.append(arg_dirname + '/' + top_include) + + # check gmx arguments conflicts + if ns.gmx_args_str != '' and (ns.nb_threads != 0 or ns.gpu_id != ''): + print( + swarmcg.shared.styling.header_warning + 'Argument -gmx_args_str is provided together with one of arguments: -nb_threads, -gpu_id\nOnly argument -gmx_args_str will be used during this execution') + + # check bonds scaling arguments conflicts + if (ns.bonds_scaling != config.bonds_scaling and ns.min_bonds_length != config.min_bonds_length) or (ns.bonds_scaling != config.bonds_scaling and ns.bonds_scaling_str != config.bonds_scaling_str) or (ns.min_bonds_length != config.min_bonds_length and ns.bonds_scaling_str != config.bonds_scaling_str): + msg = ( + "Only one of arguments -bonds_scaling, -bonds_scaling_str and " + "-min_bonds_length can be provided. Please check your parameters" + ) + raise exceptions.InputArgumentError(msg) + + ################## + # INITIALIZATION # + ################## + + # scg.set_MDA_backend(ns) # cannot use this properly currently because we cannot limit number of threads + ns.mda_backend = 'serial' # currently force single thread for safe clusters execution + + # directory to write all files for current execution of optimizations routines + os.mkdir(ns.exec_folder) + os.mkdir(ns.exec_folder+'/.internal') + os.mkdir(ns.exec_folder+'/'+config.distrib_plots_all_evals_dirname) + os.mkdir(ns.exec_folder+'/'+config.log_files_all_evals_dirname) + if ns.keep_all_sims: + os.mkdir(ns.exec_folder+'/'+config.sim_files_all_evals_dirname) + + # prepare a directory to be copied at each iteration of the optimization, to run the new simulation + os.mkdir(ns.exec_folder+'/'+config.input_sim_files_dirname) + + # get all TOP file includes copied into input simulation directory + for top_include in top_includes_filenames: + shutil.copy(top_include, ns.exec_folder + '/' + config.input_sim_files_dirname) + + # copy all other simulation files + user_provided_sim_files = ['cg_itp_filename', 'gro_input_filename', 'top_input_filename', 'mdp_minimization_filename', 'mdp_equi_filename', 'mdp_md_filename'] + for sim_file in user_provided_sim_files: + shutil.copy(arg_entries[sim_file], ns.exec_folder+'/'+config.input_sim_files_dirname) + + # modify the TOP file to adapt includes paths + with open(ns.exec_folder+'/'+config.input_sim_files_dirname+'/'+ns.top_input_basename, 'r') as fp: + all_top_lines = fp.read().split('\n') + with open(ns.exec_folder+'/'+config.input_sim_files_dirname+'/'+ns.top_input_basename, 'w+') as fp: + nb_includes = 0 + for i in range(len(all_top_lines)): + if all_top_lines[i].startswith('#include'): + all_top_lines[i] = '#include "'+os.path.basename(top_includes_filenames[nb_includes])+'"' + nb_includes += 1 + fp.writelines('\n'.join(all_top_lines)) + + ns.nb_eval = 0 # global count of evaluation steps + ns.start_opti_ts = datetime.now().timestamp() + ns.total_eval_time, ns.total_gmx_time, ns.total_model_eval_time = 0, 0, 0 + + scg.create_bins_and_dist_matrices(ns) # bins for EMD calculations + scg.read_ndx_atoms2beads(ns) # read mapping, get atoms accurences in beads + scg.get_atoms_weights_in_beads(ns) # get weights of atoms within beads + + scg.read_cg_itp_file(ns) # load the ITP object and find out geoms grouping + scg.process_scaling_str(ns) # process the bonds scaling specified by user + + print() + scg.read_aa_traj(ns) # create universe and read traj + scg.load_aa_data(ns) # read atoms attributes + scg.make_aa_traj_whole_for_selected_mols(ns) + + # for each CG bead, create atom groups for trajectory geoms calculation using mass and atom weights across beads + scg.get_beads_MDA_atomgroups(ns) + + # get CG beads weights from ITP includes present in the TOP file + # but do NOT erase the masses found in the ITP of the CG MODEL provided via arg -cg_itp + for top_include in top_includes_filenames: + + with open(top_include, 'r') as fp: + try: + itp_lines = fp.read().split('\n') + itp_lines = [itp_line.split(';')[0].strip() for itp_line in itp_lines] + except UnicodeDecodeError: + msg = "Cannot read CG ITP, it seems you provided a binary file." + raise exceptions.MissformattedFile(msg) + + for itp_line in itp_lines: + if itp_line != '': + + try: # look for beads and VS masses: try to find the format, this is exigent enough to be unique + sp_itp_line = itp_line.split() + b_type, b_mass, b_sitetype = sp_itp_line[0], float(sp_itp_line[1]), sp_itp_line[3] + if b_sitetype in ['A', 'V', 'D']: # atom, virtual site, dummy (old virtual site) + for bead_id in range(len(ns.cg_itp['atoms'])): + if ns.cg_itp['atoms'][bead_id]['bead_type'] == b_type: + if ns.cg_itp['atoms'][bead_id]['mass'] == None: + ns.cg_itp['atoms'][bead_id]['mass'] = b_mass + except (IndexError, ValueError): + pass + + print('\nMapping the trajectory from AA to CG representation') + scg.initialize_cg_traj(ns) + scg.map_aa2cg_traj(ns) + print() + + # touch results files to be appended to later + with open(ns.exec_folder+'/'+config.opti_perf_recap_file, 'w') as fp: + # TODO: print that file has been generated with Swarm-CG etc -- do this for basically all files + fp.write(f'# nb constraints: {ns.nb_constraints}\n') + fp.write(f'# nb bonds: {ns.nb_bonds}\n') + fp.write(f'# nb angles: {ns.nb_angles}\n') + fp.write(f'# nb dihedrals: {ns.nb_dihedrals}\n') + fp.write('#\n') + fp.write('# opti_cycle nb_eval fit_score_all fit_score_cstrs_bonds fit_score_angles fit_score_dihedrals eval_score Rg_AA_mapped Rg_CG parameters_set eval_time current_total_time\n') + with open(ns.exec_folder+'/'+config.opti_pairwise_distances_file, 'w'): + pass + + # set these to None to then check the variables have been filled (is not None), so we will do these calculations + # one single time in function compare_models that is called at each iteration during optimization + ns.gyr_aa_mapped, ns.gyr_aa_mapped_std = None, None + ns.sasa_aa_mapped, ns.sasa_aa_mapped_std = None, None + + print('Calculating bonds, angles and dihedrals distributions in the reference AA-mapped model') + ns.domains_val = {'constraint': [], 'bond': [], 'angle': [], 'dihedral': []} + ns.data_BI = {'bond': [], 'angle': [], 'dihedral': []} # store hists for BI, std and possibly some other stats + # create all ref atom histograms to be used for pairwise distributions comparisons + find average geoms values as first guesses (without BI at this point) + # get ref atom hists + find very first distances guesses for constraints groups + for grp_constraint in range(ns.nb_constraints): + constraint_avg, constraint_hist, constraint_values = scg.get_AA_bonds_distrib(ns, beads_ids=ns.cg_itp['constraint'][grp_constraint]['beads'], grp_type='constraint group', grp_nb=grp_constraint) + # if ns.exec_mode == 1: + ns.cg_itp['constraint'][grp_constraint]['value'] = constraint_avg + ns.cg_itp['constraint'][grp_constraint]['avg'] = constraint_avg + ns.cg_itp['constraint'][grp_constraint]['hist'] = constraint_hist + ns.domains_val['constraint'].append([round(np.min(constraint_values), 3), round(np.max(constraint_values), 3)]) + + # get ref atom hists + find very first distances and force constants guesses for bonds groups + for grp_bond in range(ns.nb_bonds): + + bond_avg, bond_hist, bond_values = scg.get_AA_bonds_distrib(ns, beads_ids=ns.cg_itp['bond'][grp_bond]['beads'], grp_type='bond group', grp_nb=grp_bond) + # if ns.exec_mode == 1: + ns.cg_itp['bond'][grp_bond]['value'] = bond_avg + ns.cg_itp['bond'][grp_bond]['avg'] = bond_avg + ns.cg_itp['bond'][grp_bond]['hist'] = bond_hist + + xmin, xmax = min(np.inf, ns.bins_bonds[np.min(np.nonzero(bond_hist))]), max(-np.inf, ns.bins_bonds[np.max(np.nonzero(bond_hist))+1]) + xmin, xmax = xmin-ns.bw_bonds, xmax+ns.bw_bonds + ns.data_BI['bond'].append([np.histogram(bond_values, range=(xmin, xmax), bins=config.bi_nb_bins)[0], np.std(bond_values), np.mean(bond_values), (xmin, xmax)]) + + ns.domains_val['bond'].append([round(np.min(bond_values), 3), round(np.max(bond_values), 3)]) # boundaries of force constats during optimization + + # get ref atom hists + find very first values and force constants guesses for angles groups + for grp_angle in range(ns.nb_angles): + + angle_avg, angle_hist, angle_values_deg, angle_values_rad = scg.get_AA_angles_distrib(ns, beads_ids=ns.cg_itp['angle'][grp_angle]['beads']) + if ns.exec_mode == 1 or ns.exec_mode == 3: + ns.cg_itp['angle'][grp_angle]['value'] = angle_avg + ns.cg_itp['angle'][grp_angle]['avg'] = angle_avg + ns.cg_itp['angle'][grp_angle]['hist'] = angle_hist + + xmin, xmax = min(np.inf, ns.bins_angles[np.min(np.nonzero(angle_hist))]), max(-np.inf, ns.bins_angles[np.max(np.nonzero(angle_hist))+1]) + xmin, xmax = xmin+ns.bw_angles/2, xmax-ns.bw_angles/2 + ns.data_BI['angle'].append([np.histogram(angle_values_rad, range=(np.deg2rad(xmin), np.deg2rad(xmax)), bins=config.bi_nb_bins)[0], np.std(angle_values_rad), (xmin, xmax)]) + + ns.domains_val['angle'].append([round(np.min(angle_values_deg), 2), round(np.max(angle_values_deg), 2)]) # boundaries of force constants during optimization + + # get ref atom hists + find very first values and force constants guesses for dihedrals groups + for grp_dihedral in range(ns.nb_dihedrals): + + dihedral_avg, dihedral_hist, dihedral_values_deg, dihedral_values_rad = scg.get_AA_dihedrals_distrib(ns, beads_ids=ns.cg_itp['dihedral'][grp_dihedral]['beads']) + if ns.exec_mode == 1: # the angle value for dihedral will be calculated from the BI fit, because for dihedrals it makes no sense to use the average + ns.cg_itp['dihedral'][grp_dihedral]['value'] = dihedral_avg + ns.cg_itp['dihedral'][grp_dihedral]['avg'] = dihedral_avg + ns.cg_itp['dihedral'][grp_dihedral]['hist'] = dihedral_hist + + xmin, xmax = -180, 180 + ns.data_BI['dihedral'].append([np.histogram(dihedral_values_rad, range=(np.deg2rad(xmin), np.deg2rad(xmax)), bins=2 *config.bi_nb_bins)[0], np.std(dihedral_values_rad), np.mean(dihedral_values_rad), (xmin, xmax)]) + + ns.domains_val['dihedral'].append([round(np.min(dihedral_values_deg), 2), round(np.max(dihedral_values_deg), 2)]) # boundaries of force constats during optimization + + if not ns.bonds_rescaling_performed: + print(' No bonds rescaling performed') + + # output png with all the reference distributions, so the user can check + ns.atom_only = True + ns.plot_filename = ns.exec_folder+'/'+config.ref_distrib_plots + with open(os.devnull, 'w') as devnull: + with contextlib.redirect_stdout(devnull): + scg.compare_models(ns, manual_mode=False) + print() + print('Plotted reference AA-mapped distributions (used as target during optimization) at location:\n ', ns.exec_folder+'/'+config.ref_distrib_plots) + ns.atom_only = False + + + ################################## + # ITERATIVE OPTIMIZATION PROCESS # + ################################## + + # parameters for each type of simulation during optimization cycles + # sim duration (ns), max nb of SWARM iterations, max nb SWARM iterations without finding new global best, percentage applied for generating variations around initial guesses/values fed humanly + # sim_type 0 is used for initialization exclusively + detecting too high force constants to lower them, no real optimization is expected from these runs + + # Settings: TEST / utlra-fast settings only for debugging -- DIHEDRALS APPLIED IN THE END EXCLUSIVELY + # sim_types = {0: {'sim_duration': 0.3, 'max_swarm_iter': 1, 'max_swarm_iter_without_new_global_best': 1, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.3}, + # 1: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.3}, + # 2: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.2}, + # 3: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 0.4, 'fct_guess_fact': 0.1}} + # opti_cycles = [['constraint', 'bond', 'angle'], ['constraint', 'bond'], ['angle'], ['constraint', 'bond', 'angle'], ['dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects + # sim_cycles = [0, 1, 1, 2, 2, 3] # simulations types + + # Settings: TEST / utlra-fast settings only for debugging -- DIHEDRALS APPLIED IN THE END EXCLUSIVELY + # sim_types = {0: {'sim_duration': 0.3, 'max_swarm_iter': 1, 'max_swarm_iter_without_new_global_best': 1, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.3}, + # 1: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.3}, + # 2: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 1.0, 'fct_guess_fact': 0.2}, + # 3: {'sim_duration': 0.3, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 2, 'val_guess_fact': 0.4, 'fct_guess_fact': 0.1}} + # opti_cycles = [['constraint', 'bond', 'angle'], ['constraint', 'bond', 'angle'], ['dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects + # sim_cycles = [0, 2, 2, 3] # simulations types + + # Settings: ROBUST / Suited for big molecules + # sim_types = {0: {'sim_duration': 5, 'max_swarm_iter': 10, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 1, 'fct_guess_fact': 0.30}, + # 1: {'sim_duration': 8, 'max_swarm_iter': 10, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.25}, + # 2: {'sim_duration': 10, 'max_swarm_iter': 10, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.25}, + # 3: {'sim_duration': 15, 'max_swarm_iter': 20, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.25}} + # opti_cycles = [['constraint', 'bond', 'angle'], ['constraint', 'bond'], ['angle'], ['constraint', 'bond', 'angle'], ['dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects + # sim_cycles = [0, 1, 1, 3, 2, 3] # simulations types + + # Strategy 1 + # Settings: FASTER / Suited for small molecules or rapid optimization + # sim_types = {0: {'sim_duration': 10, 'max_swarm_iter': 10, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 1, 'fct_guess_fact': 0.40}, + # 1: {'sim_duration': 10, 'max_swarm_iter': 10, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.30}, + # 2: {'sim_duration': 15, 'max_swarm_iter': 15, 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.25}} + # opti_cycles = [['constraint', 'bond', 'angle'], ['dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects + # sim_cycles = [0, 1, 2] # simulations types + + # previous best choice for versatile usage + # Startegy 4 + # Settings: OPTIMAL / Should be fine with any type of molecule, big or small, as long as the BI keeps yielding close enough results, which should be the case + # sim_types = {0: {'sim_duration': 10, 'max_swarm_iter': int(5+np.sqrt(ns.nb_constraints+ns.nb_bonds+ns.nb_angles)), 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 1, 'fct_guess_fact': 0.35}, + # 1: {'sim_duration': 10, 'max_swarm_iter': int(5+np.sqrt(ns.nb_angles+ns.nb_dihedrals)), 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.30}, + # 2: {'sim_duration': 10, 'max_swarm_iter': int(5+np.sqrt(ns.nb_constraints+ns.nb_bonds+ns.nb_angles+ns.nb_dihedrals)), 'max_swarm_iter_without_new_global_best': 5, 'val_guess_fact': 0.15, 'fct_guess_fact': 0.20}} + # opti_cycles = [['constraint', 'bond', 'angle'], ['angle', 'dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects + # sim_cycles = [0, 1, 2] # simulations types + + # Startegy 5 -- Coupled to fewer particles + # Settings: OPTIMAL / Should be fine with any type of molecule, big or small, as long as the BI keeps yielding close enough results, which should be the case + sim_types = {0: {'sim_duration': ns.sim_duration_short, 'max_swarm_iter': int(round(6+np.sqrt(ns.nb_constraints+ns.nb_bonds+ns.nb_angles))), 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 1, 'fct_guess_fact': 0.40}, + 1: {'sim_duration': ns.sim_duration_short, 'max_swarm_iter': int(round(6+np.sqrt(ns.nb_angles+ns.nb_dihedrals))), 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.30}, + 2: {'sim_duration': ns.sim_duration_long, 'max_swarm_iter': int(round(6+np.sqrt(ns.nb_constraints+ns.nb_bonds+ns.nb_angles+ns.nb_dihedrals))), 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.20}} + # opti_cycles = [['constraint', 'bond', 'angle'], ['angle', 'dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects + opti_cycles = [['constraint', 'bond', 'angle'], ['angle', 'dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] + sim_cycles = [0, 1, 2] # simulations types + + # for tests + # sim_types = {0: {'sim_duration': ns.sim_duration_short, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 1, 'fct_guess_fact': 0.40}, + # 1: {'sim_duration': ns.sim_duration_short, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.30}, + # 2: {'sim_duration': ns.sim_duration_long, 'max_swarm_iter': 2, 'max_swarm_iter_without_new_global_best': 6, 'val_guess_fact': 0.25, 'fct_guess_fact': 0.20}} + # opti_cycles = [['constraint', 'bond', 'angle', 'dihedral'], ['angle', 'dihedral'], ['constraint', 'bond', 'angle', 'dihedral']] # optimization cycles to perform with given geom objects + # sim_cycles = [0, 1, 2] # simulations types + + # NOTE: currently, due to an issue in FST-PSO, number of swarm iterations performed is +2 when compared to the numbers we feed + + ns.opti_itp = copy.deepcopy(ns.cg_itp) # the ITP object that will be optimized stepwise, at the end of each optimization cycle (geom type wise) + ns.eval_nb_geoms = {'constraint': 0, 'bond': 0, 'angle': 0, 'dihedral': 0} # geoms to optimize at each step + + # remove dihedrals from cycles if CG ITP file does NOT contain dihedrals + if ns.nb_dihedrals == 0: + opti_cycles_cp, sim_cycles_cp = [], [] + nb_poped = 0 + for i in range(len(opti_cycles)): + opti_cycles_cp.extend([[]]) + for j in range(len(opti_cycles[i])): + if opti_cycles[i][j] != 'dihedral': + opti_cycles_cp[i-nb_poped].append(opti_cycles[i][j]) + if len(opti_cycles_cp[i-nb_poped]) == 0: + opti_cycles_cp.pop() + nb_poped += 1 + else: + sim_cycles_cp.extend([sim_cycles[i]]) + opti_cycles, sim_cycles = opti_cycles_cp, sim_cycles_cp + + # state variables for the cycles of optimization + ns.performed_init_BI = {'bond': False, 'angle': False, 'dihedral': False} + ns.opti_geoms_all = set(geom for opti_cycle_geoms in opti_cycles for geom in opti_cycle_geoms) + ns.best_fitness = [np.inf, None] # fitness_score, eval_step_best_score + + # storage for best independent set of parameters by geom, for initialization of a (few ?) special particle after 1st opti cycle + ns.all_best_emd_dist_geoms = {'constraints': {}, 'bonds': {}, 'angles': {}, 'dihedrals': {}} + ns.all_best_params_dist_geoms = {'constraints': {}, 'bonds': {}, 'angles': {}, 'dihedrals': {}} + for i in range(ns.nb_constraints): + ns.all_best_emd_dist_geoms['constraints'][i] = config.sim_crash_EMD_indep_score + ns.all_best_params_dist_geoms['constraints'][i] = {} + for i in range(ns.nb_bonds): + ns.all_best_emd_dist_geoms['bonds'][i] = config.sim_crash_EMD_indep_score + ns.all_best_params_dist_geoms['bonds'][i] = {} + for i in range(ns.nb_angles): + ns.all_best_emd_dist_geoms['angles'][i] = config.sim_crash_EMD_indep_score + ns.all_best_params_dist_geoms['angles'][i] = {} + for i in range(ns.nb_dihedrals): + ns.all_best_emd_dist_geoms['dihedrals'][i] = config.sim_crash_EMD_indep_score + ns.all_best_params_dist_geoms['dihedrals'][i] = {} + + ############################# + # START OPTIMIZATION CYCLES # + ############################# + + for i in range(len(opti_cycles)): + + ns.opti_cycle = {'nb_cycle': i+1, 'geoms': opti_cycles[i], 'nb_geoms': {'constraint': 0, 'bond': 0, 'angle': 0, 'dihedral': 0}} + ns.out_itp = copy.deepcopy(ns.opti_itp) # input ITP copy, on which we might perform BI, and that is the object we will modify at each evaluation step to store the values from FST-PSO + + # model selection based on fitness + Rg during last optimization cycle + # ns.all_rg_last_cycle, ns.all_fitness_last_cycle = np.array([]), np.array([]) + # ns.best_fitness_Rg_combined = 0 # id of the best model based on bonded fitness + Rg selection + + ns.prod_sim_time = sim_types[sim_cycles[i]]['sim_duration'] + ns.val_guess_fact = sim_types[sim_cycles[i]]['val_guess_fact'] + ns.fct_guess_fact = sim_types[sim_cycles[i]]['fct_guess_fact'] + ns.max_swarm_iter = sim_types[sim_cycles[i]]['max_swarm_iter'] + ns.max_swarm_iter_without_new_global_best = sim_types[sim_cycles[i]]['max_swarm_iter_without_new_global_best'] + + # adapt number of geoms according to the optimization cycle + geoms_display = [] + if 'constraint' in ns.opti_cycle['geoms'] or 'bond' in ns.opti_cycle['geoms']: + geoms_display.append('constraints/bonds') + if 'constraint' in ns.opti_cycle['geoms']: + ns.opti_cycle['nb_geoms']['constraint'] = ns.nb_constraints + if 'bond' in ns.opti_cycle['geoms']: + ns.opti_cycle['nb_geoms']['bond'] = ns.nb_bonds + if 'angle' in ns.opti_cycle['geoms']: + ns.opti_cycle['nb_geoms']['angle'] = ns.nb_angles + geoms_display.append('angles') + if 'dihedral' in ns.opti_cycle['geoms']: + ns.opti_cycle['nb_geoms']['dihedral'] = ns.nb_dihedrals + geoms_display.append('dihedrals') + geoms_display = ' & '.join(geoms_display) + + print() + print(swarmcg.shared.styling.sep_close) + print('| STARTING OPTIMIZATION CYCLE', ns.opti_cycle['nb_cycle'], ' |') + print('| Optimizing', geoms_display, ' '*(95-16-len(geoms_display)), '|') + print(swarmcg.shared.styling.sep_close) + + # actual BI to get the initial guesses of force constants, for all selected geoms at this given optimization step + # BI is performed: + # -- exec_mode 1: all equilibrium values and force constants + # -- exec_mode 2: equilibrium values are not touched for angles and dihedrals, but all force constants are estimated + # -- exec_mode 3: equilibrium values are not touched for dihedrals, but all force constants are estimated + scg.perform_BI(ns) # performed on object ns.out_itp + + # build vector for search space boundaries + create variations around the BI initial guesses + search_space_boundaries = scg.get_search_space_boundaries(ns) + # ns.worst_fit_score = round(len(search_space_boundaries) * config.sim_crash_EMD_indep_score, 3) + ns.worst_fit_score = round(\ + np.sqrt((ns.nb_constraints+ns.nb_bonds) * config.sim_crash_EMD_indep_score) + \ + np.sqrt(ns.nb_angles * config.sim_crash_EMD_indep_score) + \ + np.sqrt(ns.nb_dihedrals * config.sim_crash_EMD_indep_score) \ + , 3) + # nb_particles = int(10 + 2*np.sqrt(len(search_space_boundaries))) # formula used by FST-PSO to choose nb of particles, which defines the number of initial guesses we can use + nb_particles = int(round(2 + np.sqrt(len(search_space_boundaries)))) # adapted to have less particles and fitted to our problems, which has good initial guesses and error driven initialization + # nb_particles = 2 # for tests + initial_guess_list = scg.get_initial_guess_list(ns, nb_particles) + + # actual optimization + with open(os.devnull, 'w') as devnull: + with contextlib.redirect_stdout(devnull): + FP = FuzzyPSO() + FP.set_search_space(search_space_boundaries) + FP.set_swarm_size(nb_particles) + FP.set_fitness(fitness=scg.eval_function, arguments=ns, skip_test=True) + result = FP.solve_with_fstpso(max_iter=ns.max_swarm_iter, initial_guess_list=initial_guess_list, max_iter_without_new_global_best=ns.max_swarm_iter_without_new_global_best) + + # update ITP object with the best solution using geoms considered at this given optimization step + scg.update_cg_itp_obj(ns, parameters_set=result[0].X, update_type=2) + + # clean temporary copied directory with user's input files + shutil.rmtree(ns.exec_folder+'/'+config.input_sim_files_dirname) + + # print some stats + total_time_sec = datetime.now().timestamp() - ns.start_opti_ts + total_time = round(total_time_sec / (60 * 60), 2) + fitness_eval_time = round(ns.total_eval_time / (60 * 60), 2) + init_time = round((total_time_sec - ns.total_eval_time) / (60 * 60), 2) + ns.total_gmx_time = round(ns.total_gmx_time / (60 * 60), 2) + ns.total_model_eval_time = round(ns.total_model_eval_time / (60 * 60), 2) + print() + print(swarmcg.shared.styling.sep_close) + print('| FINISHED PROPERLY |') + print(swarmcg.shared.styling.sep_close) + print() + print('Total nb of evaluation steps:', ns.nb_eval) + print('Best model obtained at evaluation step number:', ns.best_fitness[1]) + print() + print(f'Total execution time : {total_time} h') + print(f'Initialization time : {init_time} h ({round(init_time/total_time*100, 2)} %)') + print(f'Simulations time : {ns.total_gmx_time} h ({round(ns.total_gmx_time/total_time*100, 2)} %)') + print(f'Models scoring time : {ns.total_model_eval_time} h ({round(ns.total_model_eval_time/total_time*100, 2)} %)') + print() + + +def main(): + print( + swarmcg.shared.styling.header_package(' Module: CG model optimization\n')) + + formatter = lambda prog: RawTextHelpFormatter(prog, width=135, max_help_position=52) + args_parser = ArgumentParser( + description=OPTIMISE_DESCR, + formatter_class=formatter, + add_help=False, + usage=SUPPRESS + ) + + # TODO: handle trajectories for which no box informations are provided + # TODO: explain what is modified in the MDP + # TODO: explain module analyze_opti_moves.py can be used to monitor optimization at any point of the process + # TODO: end the help message by a new frame with examples from the demo data + + req_args_header = swarmcg.shared.styling.sep_close + '\n| REQUIRED ARGUMENTS |\n' + swarmcg.shared.styling.sep_close + opt_args_header = swarmcg.shared.styling.sep_close + '\n| OPTIONAL ARGUMENTS |\n' + swarmcg.shared.styling.sep_close + bullet = ' ' + + optional_args0 = args_parser.add_argument_group( + req_args_header + '\n\n' + bullet + 'EXECUTION MODE') + optional_args0.add_argument('-exec_mode', dest='exec_mode', + help='MODE 1: Tune both bonds lengths, angles/dihedrals values\n and their force constants\nMODE 2: Like MODE 1 but angles/dihedrals values in the prelim.\n CG model ITP are conserved during optimization\nMODE 3: Like MODE 1 but only dihedrals values in the prelim.\n CG model ITP are conserved during optimization', + type=int, default=1, metavar=' (1)') + + required_args = args_parser.add_argument_group(bullet + 'REFERENCE AA MODEL') + required_args.add_argument('-aa_tpr', dest='aa_tpr_filename', help=config.help_aa_tpr, type=str, + default=config.metavar_aa_tpr, + metavar=' ' + scg.par_wrap(config.metavar_aa_tpr)) + required_args.add_argument('-aa_traj', dest='aa_traj_filename', help=config.help_aa_traj, + type=str, default=config.metavar_aa_traj, + metavar=' ' + scg.par_wrap(config.metavar_aa_traj)) + required_args.add_argument('-cg_map', dest='cg_map_filename', help=config.help_cg_map, type=str, + default=config.metavar_cg_map, + metavar=' ' + scg.par_wrap(config.metavar_cg_map)) + required_args.add_argument('-mapping', dest='mapping_type', help=config.help_mapping_type, type=str, + default='COM', metavar=' (COM)') + + sim_filenames_args = args_parser.add_argument_group(bullet + 'CG MODEL OPTIMIZATION') + sim_filenames_args.add_argument('-cg_itp', dest='cg_itp_filename', + help='ITP file of the CG model to optimize', type=str, + default=config.metavar_cg_itp, + metavar=' ' + scg.par_wrap(config.metavar_cg_itp)) + sim_filenames_args.add_argument('-cg_gro', dest='gro_input_filename', + help='Starting GRO file used for iterative simulation\nWill be minimized and relaxed before each MD run', + type=str, default='start_conf.gro', + metavar=' (start_conf.gro)') + sim_filenames_args.add_argument('-cg_top', dest='top_input_filename', + help='TOP file used for iterative simulation', type=str, + default='system.top', metavar=' (system.top)') + sim_filenames_args.add_argument('-cg_mdp_mini', dest='mdp_minimization_filename', + help='MDP file used for minimization runs', type=str, + default='mini.mdp', metavar=' (mini.mdp)') + sim_filenames_args.add_argument('-cg_mdp_equi', dest='mdp_equi_filename', + help='MDP file used for equilibration runs', type=str, + default='equi.mdp', metavar=' (equi.mdp)') + sim_filenames_args.add_argument('-cg_mdp_md', dest='mdp_md_filename', + help='MDP file used for the MD runs analyzed for optimization', + type=str, default='md.mdp', metavar=' (md.mdp)') + + optional_args4 = args_parser.add_argument_group( + opt_args_header + '\n\n' + bullet + 'FILES HANDLING') + optional_args4.add_argument('-in_dir', dest='input_folder', + help='Additional prefix path used to find argument-provided files\nIf ambiguous, files found without prefix are preferred', + type=str, default='.', metavar='') + optional_args4.add_argument('-out_dir', dest='output_folder', + help='Directory where to store all outputs of this program\nDefault -out_dir is named after timestamp', + type=str, default='', metavar='') + + optional_args1 = args_parser.add_argument_group(bullet + 'GROMACS SETTINGS') + optional_args1.add_argument('-gmx', dest='gmx_path', help=config.help_gmx_path, type=str, + default=config.gmx_path, + metavar=' ' + scg.par_wrap(config.gmx_path)) + optional_args1.add_argument('-nt', dest='nb_threads', + help="Nb of threads to use, forwarded to 'gmx mdrun -nt'", type=int, + default=0, metavar=' (0)') + optional_args1.add_argument('-mpi', dest='mpi_tasks', + help="Nb of mpi programs (X), triggers 'mpirun -np X gmx'", type=int, + default=0, metavar='') + optional_args1.add_argument('-gpu_id', dest='gpu_id', + help='String (use quotes) space-separated list of GPU device IDs', + type=str, default='', metavar='') + optional_args1.add_argument('-gmx_args_str', dest='gmx_args_str', + help='String (use quotes) of arguments to forward to gmx mdrun\nIf provided, arguments -nt and -gpu_id are ignored', + type=str, default='', metavar='') + optional_args1.add_argument('-mini_maxwarn', dest='mini_maxwarn', + help='Max. number of warnings to ignore, forwarded to gmx\ngrompp -maxwarn at each minimization step', + type=int, default=1, metavar=' (1)') + optional_args1.add_argument('-sim_kill_delay', dest='sim_kill_delay', + help='Time (s) after which to kill a simulation that has not been\nwriting into its log file, in case a simulation gets stuck', + type=int, default=60, metavar=' (60)') + + optional_args2 = args_parser.add_argument_group(bullet + 'CG MODEL SCALING') + optional_args2.add_argument('-aa_rg_offset', dest='aa_rg_offset', + help='Radius of gyration offset (nm) to be applied to AA data\naccording to your potential bonds rescaling (for display only)', + type=float, default=0.00, metavar=' ' + scg.par_wrap('0.00')) + optional_args2.add_argument('-bonds_scaling', dest='bonds_scaling', + help=config.help_bonds_scaling, type=float, + default=config.bonds_scaling, + metavar=' ' + scg.par_wrap(config.bonds_scaling)) + optional_args2.add_argument('-bonds_scaling_str', dest='bonds_scaling_str', + help=config.help_bonds_scaling_str, type=str, + default=config.bonds_scaling_str, metavar='') + optional_args2.add_argument('-min_bonds_length', dest='min_bonds_length', + help=config.help_min_bonds_length, type=float, + default=config.min_bonds_length, + metavar=' ' + scg.par_wrap(config.min_bonds_length)) + + optional_args5 = args_parser.add_argument_group(bullet + 'CG MODEL SCORING') + optional_args5.add_argument('-cg_time_short', dest='sim_duration_short', + help='Simulation time (ns) of the MD runs analyzed for optimization\nIn opti. cycles 1 and 2, this will modify MDP file for the MD runs', + type=float, default=10, metavar=' (10)') + optional_args5.add_argument('-cg_time_long', dest='sim_duration_long', + help='Simulation time (ns) of the MD runs analyzed for optimization\nIn opti. cycle 3, this will modify MDP file for the MD runs', + type=float, default=25, metavar=' (25)') + optional_args5.add_argument('-b2a_score_fact', dest='bonds2angles_scoring_factor', + help=config.help_bonds2angles_scoring_factor, type=float, + default=config.bonds2angles_scoring_factor, + metavar=' ' + scg.par_wrap(config.bonds2angles_scoring_factor)) + optional_args5.add_argument('-bw_constraints', dest='bw_constraints', + help=config.help_bw_constraints, type=float, + default=config.bw_constraints, + metavar=' ' + scg.par_wrap(config.bw_constraints)) + optional_args5.add_argument('-bw_bonds', dest='bw_bonds', help=config.help_bw_bonds, type=float, + default=config.bw_bonds, + metavar=' ' + scg.par_wrap(config.bw_bonds)) + optional_args5.add_argument('-bw_angles', dest='bw_angles', help=config.help_bw_angles, + type=float, default=config.bw_angles, + metavar=' ' + scg.par_wrap(config.bw_angles)) + optional_args5.add_argument('-bw_dihedrals', dest='bw_dihedrals', help=config.help_bw_dihedrals, + type=float, default=config.bw_dihedrals, + metavar=' ' + scg.par_wrap(config.bw_dihedrals)) + optional_args5.add_argument('-bonds_max_range', dest='bonded_max_range', + help=config.help_bonds_max_range, type=float, + default=config.bonds_max_range, + metavar=' ' + scg.par_wrap(config.bonds_max_range)) + + optional_args6 = args_parser.add_argument_group(bullet + 'CG MODEL FORCE CONSTANTS') + optional_args6.add_argument('-max_fct_bonds_f1', dest='default_max_fct_bonds_opti', + help=config.help_max_fct_bonds, type=float, + default=config.default_max_fct_bonds_opti, + metavar=' ' + scg.par_wrap(config.default_max_fct_bonds_opti)) + optional_args6.add_argument('-max_fct_angles_f1', dest='default_max_fct_angles_opti_f1', + help=config.help_max_fct_angles_f1, type=float, + default=config.default_max_fct_angles_opti_f1, + metavar=' ' + scg.par_wrap(config.default_max_fct_angles_opti_f1)) + optional_args6.add_argument('-max_fct_angles_f2', dest='default_max_fct_angles_opti_f2', + help=config.help_max_fct_angles_f2, type=float, + default=config.default_max_fct_angles_opti_f2, + metavar=' ' + scg.par_wrap(config.default_max_fct_angles_opti_f2)) + optional_args6.add_argument('-max_fct_dihedrals_f149', + dest='default_abs_range_fct_dihedrals_opti_func_with_mult', + help=config.help_max_fct_dihedrals_with_mult, type=float, + default=config.default_abs_range_fct_dihedrals_opti_func_with_mult, + metavar='' + scg.par_wrap( + config.default_abs_range_fct_dihedrals_opti_func_with_mult)) + optional_args6.add_argument('-max_fct_dihedrals_f2', + dest='default_max_fct_dihedrals_opti_func_without_mult', + help=config.help_max_fct_dihedrals_without_mult, type=float, + default=config.default_max_fct_dihedrals_opti_func_without_mult, + metavar='' + scg.par_wrap( + config.default_max_fct_dihedrals_opti_func_without_mult)) + + optional_args3 = args_parser.add_argument_group(bullet + 'OTHERS') + optional_args3.add_argument('-temp', dest='temp', + help='Temperature used to perform Boltzmann inversion (K)', + type=float, default=config.sim_temperature, + metavar=' ' + scg.par_wrap(config.sim_temperature)) + optional_args3.add_argument('-keep_all_sims', dest='keep_all_sims', + help='Store all gmx files for all simulations, may use disk space', + action='store_true', default=False) + optional_args3.add_argument('-h', '--help', help='Show this help message and exit', action='help') + optional_args3.add_argument('-v', '--verbose', dest='verbose', help=config.help_verbose, + action='store_true', default=False) + + # display help if script was called without arguments + if len(sys.argv) == 1: + args_parser.print_help() + sys.exit() + + # arguments handling, display command line if help or no arguments provided + # argcomplete.autocomplete(parser) + ns = args_parser.parse_args() + input_cmdline = ' '.join(map(cmd_quote, sys.argv)) + ns.exec_folder = time.strftime( + "MODEL_OPTI__STARTED_%d-%m-%Y_%Hh%Mm%Ss") # default folder name for all files of this optimization run, in case none is provided + if ns.output_folder != '': + ns.exec_folder = ns.output_folder + + print('Working directory:', os.getcwd()) + print('Command line:', input_cmdline) + print('Results directory:', ns.exec_folder) + + run(ns) + +if __name__ == "__main__": + main() diff --git a/swarmcg/shared/__init__.py b/swarmcg/shared/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/swarmcg/shared/exceptions.py b/swarmcg/shared/exceptions.py new file mode 100644 index 0000000..8fa4346 --- /dev/null +++ b/swarmcg/shared/exceptions.py @@ -0,0 +1,90 @@ +from .styling import sep + +header_error = "\n\n-- ! ERROR ! --\n" +header_warning = "\n-- ! WARNING ! --\n" +header_gmx_error = "\n\n GMX ERROR MSG\n" + sep + "\n\n" + +# TODO: disable showing exceptions traceback for distributing to users ? maybe there is a way we can keep it active for our dev purpose though ? + +class BaseError(Exception): + """ + Base exception class. + """ + def __init__(self, msg): + self.message = header_error + msg + super().__init__(self.message) + + +class ExecError(Exception): + """ + Base exception class. + """ + def __init__(self, msg): + self.message = header_gmx_error + msg + super().__init__(self.message) + + +class InvalidArgument(BaseError): + + base_msg = ( + "Cannot interpret argument -{name} as provided: {value}. " + "{additional_info}" + "Please check your parameters, or look for help in an example." + ) + + def __init__(self, argument, value, additional_info=""): + self.argument = argument + self.value = value + self.additional_info = additional_info + self.message = self.base_msg.format( + name=argument, value=value, additional_info=additional_info + ) + super().__init__(self.message) + + +class IncompleteOptimisationFile(BaseError): + pass + + +class OptimisationResultsError(BaseError): + pass + + +class MissingCoordinateFile(BaseError): + pass + + +class MissingTrajectoryFile(BaseError): + pass + + +class MissingItpFile(BaseError): + pass + + +class MissingIndexFile(BaseError): + pass + + +class InputArgumentError(BaseError): + pass + + +class ExecutableNotFound(BaseError): + pass + + +class AvoidOverwritingFolder(BaseError): + pass + + +class MDSimulationInputError(BaseError): + pass + + +class MissformattedFile(BaseError): + pass + + +class ComputationError(ExecError): + pass \ No newline at end of file diff --git a/swarmcg/shared/io.py b/swarmcg/shared/io.py new file mode 100644 index 0000000..e69de29 diff --git a/swarmcg/shared/styling.py b/swarmcg/shared/styling.py new file mode 100644 index 0000000..9bf0cdb --- /dev/null +++ b/swarmcg/shared/styling.py @@ -0,0 +1,142 @@ +import swarmcg +from .. import config + +sep = '----------------------------------------------------------------------' +sep_close = '+---------------------------------------------------------------------------------------------+' +header_warning = '\n-- ! WARNING ! --\n' +header_error = '\n-- ! ERROR ! --\n' +header_gmx_error = sep + '\n GMX ERROR MSG\n' + sep + '\n\n' + +# String 'S m a r t . C G' Ivrit style Fitted/Full +def header_package(module_line): + return f""" + + + ███████╗██╗ ██╗ █████╗ ██████╗ ███╗ ███╗ ██████╗ ██████╗ + ██╔════╝██║ ██║██╔══██╗██╔══██╗████╗ ████║ ██╔════╝██╔════╝ + ███████╗██║ █╗ ██║███████║██████╔╝██╔████╔██║█████╗██║ ██║ ███╗ + ╚════██║██║███╗██║██╔══██║██╔══██╗██║╚██╔╝██║╚════╝██║ ██║ ██║ + ███████║╚███╔███╔╝██║ ██║██║ ██║██║ ╚═╝ ██║ ╚██████╗╚██████╔╝ + ╚══════╝ ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝ ╚═════╝ v {swarmcg.__version__ } + {module_line} +{sep_close} +| Swarm-CG is distributed under the terms of the MIT License. | +| | +| Feedback, questions and bug reports are welcome at: | +| {config.github_url}/issues | +| | +| If you found Swarm-CG useful in your research, please cite: | +| Swarm-CG: Automatic parametrization of bonded terms in MARTINI-based | +| CG models of simple to complex molecules via FST-PSO, 2020 ChemRxiv, | +| Empereur-mot C., Pesce L., Doni G., Bochicchio D., Capelli R., Perego C., Pavan G.M. | +| | +| Swarm-CG relies on FST-PSO: | +| Fuzzy Self-Tuning PSO: A settings-free algorithm for global optimization, | +| 2018 Swarm Evo Comp, Nobile M.S., Cazzaniga P., Besozzi D., Colombo R., Mauri G., Pasia G. | +{sep_close} +""" + +ANALYSE_DESCR = """ +This module produces a visual summary (big plot) of an optimization procedure started with +module 'scg_optimize' to refine the bonded terms of a coarse-grained (CG) molecular model. +It works whether the optimization is ongoing or finished. The plot will be produced in the +directory provided via argument -opti_dir. + +Top row displays bonded terms score (global and breakdown) together with radius of gyration +(Rg) and solvent accessible surface area (SASA) estimations. We call these estimations because +they are calculated on short simulations used during optimization (time depends on parameters +used for optimization), therefore one should always run a long simulation at the end of the +optimizaton process, from which one can calculate the real Rg and SASA values for your model. + +Other rows display bond, angle and dihedral parameters tested together with their independant +score (distance from the AA distributions using EMD/Wasserstein). This allows to diagnose +issues, notably related to the topology defined in the ITP file, for example if the score +cannot go down for a specific group of bonds, angles or dihedrals. The optimization procedure +is in principle robust, as demonstrated in the paper, however problems can arise from the CG +representation used (e.g. if topology is too restrictive or incorrectly defined) and non-bonded +parameters (e.g. strong intra-molecular attractions that would not allow the molecule to adopt +extended conformations). +""" + +EVALUATE_DESCR = """ +This module enables quick evaluation of the fit of bond, angle and dihedral distributions between +a CG model trajectory and a reference AA model trajectory of an identical molecule, in a single +comprehensive figure. The figure's rows display bond, angle and dihedral distributions for groups +present in your system according to the ITP file. + +Arguments allows to specify scaling of the AA bonds used as reference to optimize the CG model. +An image displaying all AA reference distributions will be created at the very beginning of the +optimization process. You can check it to make sure scaling is conform to your expectations. + +The CG model preliminary ITP file follows the standard ITP format, with one subtlety. The file +can include groups of bonds, angles and dihedrals that will be considered identical. Their +distributions will be averaged within groups. This is important to obtain reliable results for +symmetrical molecules. Groups can be formed using empty line(s) or comment(s), like this: + + [ angles ] + + ; i j k funct angle force.c. + ; grp 1 + 5 6 10 1 150 40 + 9 8 11 1 150 40 + ; grp 2 + 1 6 10 2 120 0 ; NOTE: either comment(s) or empty line(s) + 4 8 11 2 120 0 ; separate groups of bonds/ang/dihe. + +The AA trajectory is mapped on-the-fly using file from argument -cg_map, which uses gromacs NDX +file format. Periodic boundary conditions are handled internally if the input trajectories +contain box dimensions. +""" + +OPTIMISE_DESCR = """ +This module automatically optimizes the bonded parameters of a CG model to best match the bonds, +angles and dihedrals distributions of a reference AA model. Different sets of bonded parameters +are explored via swarm optimization (FST-PSO) and iterative CG simulations. Bonded parameters are +evaluated for the matching they produce between AA and CG distributions via a scoring function +relying on the Earth Movers' Distance (EMD/Wasserstein). The process is designed to execute in +4-24h on a standard desktop machine, according to hardware, molecule size and simulations setup. + +This module has 2 optimization modes: + +(1) TUNE BOTH BONDS LENGTHS, ANGLES/DIHEDRALS VALUES AND THEIR FORCE CONSTANTS. First uses + Boltzmann Inversion to estimate bonds lengths, angles/dihedrals values and their force + constants, then runs optimization to best fit the reference AA-mapped distributions. + +(2) TUNE ONLY FORCE CONSTANTS FOR ANGLES/DIHEDRALS VALUES AND ALL PARAMETERS FOR BONDS. + Equilibrium values of angles/dihedrals provided in the preliminary CG ITP model are + conserved while optimization best fits reference AA-mapped distributions. + +Independently of parameters, the expected input is: + +(1) Atomistic trajectory of the molecule (gromacs binary TPR + trajectory files XTC TRR) +(2) Mapping file, atoms to CG beads (gromacs NDX format) +(3) CG model ITP file to be optimized (group identical bonds/angles/dihedrals, see below) +(4) CG simulation files (initial configuration GRO + system TOP + MDP files) + +You can prepare a directory using default input filenames, then provide only argument -in_dir. +If -in_dir is provided, all filenames provided as arguments will also be searched for within +this directory. Demonstration data are available at ''' + config.github_url + '''. + +Arguments allows to specify scaling of the AA bonds used as reference to optimize the CG model. +An image displaying all AA reference distributions will be created at the very beginning of the +optimization process. You can check it to make sure scaling is conform to your expectations. + +The CG model preliminary ITP file follows the standard ITP format, with one subtlety. The file +can include groups of bonds, angles and dihedrals that will be considered identical. Their +distributions will be averaged within groups. This is important to obtain reliable results for +symmetrical molecules. Groups can be formed using empty line(s) or comment(s), like this: + +[ angles ] + +; i j k funct angle force.c. +; grp 1 + 5 6 10 1 150 40 ; NOTE 1: force constants can be set to 0 + 9 8 11 1 150 40 ; in the prelim. model to optimize +; grp 2 + 1 6 10 2 120 0 ; NOTE 2: either comment(s) or empty line(s) + 4 8 11 2 120 0 ; separate groups of bonds/ang/dihe. + +The AA trajectory is mapped on-the-fly using file from argument -cg_map, which uses gromacs NDX +file format. Periodic boundary conditions are handled internally if the input AA trajectory +contains box dimensions +""" \ No newline at end of file diff --git a/swarmcg/shared/utils.py b/swarmcg/shared/utils.py new file mode 100644 index 0000000..7cf004f --- /dev/null +++ b/swarmcg/shared/utils.py @@ -0,0 +1,59 @@ +import sys + +import numpy as np + +from . import exceptions + + +def forward_fill(arr, cond_value): + """ + Foward fill a list of values with the last valid one. + """ + valid_val = None + for i in range(len(arr)): + if arr[i] != cond_value: + valid_val = arr[i] + else: + j = i + while valid_val is None and j < len(arr): + j += 1 + try: + if arr[j] != cond_value: + valid_val = arr[j] + break + except IndexError as e: + msg = ( + "Unexpected read of the optimization results, " + "please check that your simulations have not all been crashing" + ) + raise exceptions.OptimisationResultsError(msg) + + if valid_val is not None: + arr[i] = valid_val + else: + msg = ( + "All simulations crashed, nothing to display. " + "Please check the setup and settings of your optimization run." + ) + raise exceptions.OptimisationResultsError(msg) + + return arr + + +def sma(interval, window_size): + """ + Implement simple moving average with convolution operator. + """ + window = np.ones(int(window_size)) / float(window_size) + return np.convolve(interval, window, 'same') + + +def ewma(hist, alpha, windowSize): + """ + Implement expontential moving average with convolution operator. + """ + wghts = (1 - alpha) ** np.arange(windowSize) + wghts = wghts / wghts.sum() + out = np.full(len(hist), np.nan) + out = np.convolve(hist, wghts, 'same') + return out diff --git a/swarmcg/simulations/__init__.py b/swarmcg/simulations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/swarmcg/simulations/potentials.py b/swarmcg/simulations/potentials.py new file mode 100644 index 0000000..48cb777 --- /dev/null +++ b/swarmcg/simulations/potentials.py @@ -0,0 +1,44 @@ +import numpy as np + + +def gmx_bonds_func_1(x, a, b, c): + """ + Gromacs potential function 1 for bonds. + """ + return a / 2 * (x - b) ** 2 + c + + +def gmx_angles_func_1(x, a, b, c): + """ + Gromacs potential function 1 for angles. + """ + return gmx_bonds_func_1(x, a, b, c) + + +def gmx_angles_func_2(x, a, b, c): + """ + Gromacs potential function 2 for angles. + """ + return a / 2 * (np.cos(x) - np.cos(b)) ** 2 + c + + +def gmx_dihedrals_func_1(mult): + """ + Gromacs potential function 1 for angles -- generated on the fly with adjusted multiplicity + """ + def mult_adjusted(x, a, b, c): + return a * (1 + np.cos(mult * x - b)) + c + return mult_adjusted + + +def gmx_dihedrals_func_2(x, a, b, c): + """ + Gromacs potential function 2 for dihedrals -- the same as potential function 1 for angles + """ + return gmx_bonds_func_1(x, a, b, c) # it's actually the same + + +# TODO: for dihedral function 9, this is the merging of several potentials of +# TODO: gmx_dihedrals_func_1 -- here one of mult=1 together with another of mult=2 +# def f(x,a,b,c,d,e): +# return a * (1+np.cos(x-b)) + d * (1+np.cos(2*x-e)) + c \ No newline at end of file diff --git a/swarmcg/simulations/vs_functions.py b/swarmcg/simulations/vs_functions.py new file mode 100644 index 0000000..3c2a9de --- /dev/null +++ b/swarmcg/simulations/vs_functions.py @@ -0,0 +1,181 @@ +import numpy as np +import MDAnalysis as mda +from ..shared import exceptions + +# All these functions for virtual sites definitions are explained +# in the GROMACS manual part 5.5.7 (page 379 in manual version 2020) +# Check also the bonded potentials table best viewed here: +# http://manual.gromacs.org/documentation/2020/reference-manual/topologies/topology-file-formats.html#tab-topfile2 + +# TODO: test all these functions + + +# Functions for virtual_sites2 + +# vs_2 func 1 -> Linear combination using 2 reference points +# weighted COG using a percentage in [0, 1] +# the weight is applied on the bead ID that comes first +def vs2_func_1(ns, traj, vs_def_beads_ids, vs_params): + + i, j = vs_def_beads_ids + a = vs_params # weight + weights = np.array([1-a, a]) + + for ts in ns.aa2cg_universe.trajectory: + traj[ts.frame] = ns.aa2cg_universe.atoms[[i, j]].center(weights) + + +# vs_2 func 2 -> Linear combination using 2 reference points +# on the vector from i to j, at given distance (nm) +# NOTE: it seems this one exists only since GROMACS 2020 +# TODO: check this one with a GMX 2020 installation +def vs2_func_2(ns, traj, vs_def_beads_ids, vs_params): + + i, j = vs_def_beads_ids + a = vs_params # nm + a = a * 10 # retrieve amgstrom for MDA + + for ts in ns.aa2cg_universe.trajectory: + pos_i = ns.aa2cg_universe.atoms[i].position + pos_j = ns.aa2cg_universe.atoms[j].position + r_ij = pos_j - pos_i + traj[ts.frame] = pos_i + a * r_ij / mda.lib.mdamath.norm(r_ij) + + +# Functions for virtual_sites3 + +# vs_3 func 1 -> Linear combination using 3 reference points +# in the plane, using sum of vectors from i to j and from k to i +def vs3_func_1(ns, traj, vs_def_beads_ids, vs_params): + + i, j, k = vs_def_beads_ids + a, b = vs_params # nm, nm + a, b = a * 10, b * 10 # retrieve amgstrom for MDA + + for ts in ns.aa2cg_universe.trajectory: + pos_i = ns.aa2cg_universe.atoms[i].position + pos_j = ns.aa2cg_universe.atoms[j].position + pos_k = ns.aa2cg_universe.atoms[k].position + r_ij = pos_j - pos_i + r_ik = pos_k - pos_i + traj[ts.frame] = pos_i + a * r_ij / mda.lib.mdamath.norm(r_ij) / 2 + b * r_ik / mda.lib.mdamath.norm(r_ik) / 2 + + +# vs_3 func 2 -> Linear combination using 3 reference points +# in the plane, using WEIGHTS sum of vectors from j to i and from k to i + fixed distance +# I used their formula (hopefully) so the form differs from the explanation on line above, but it should be identical +def vs3_func_2(ns, traj, vs_def_beads_ids, vs_params): + + i, j, k = vs_def_beads_ids + a, b = vs_params # weight, nm + b = b * 10 # retrieve amgstrom for MDA + + for ts in ns.aa2cg_universe.trajectory: + pos_i = ns.aa2cg_universe.atoms[i].position + pos_j = ns.aa2cg_universe.atoms[j].position + pos_k = ns.aa2cg_universe.atoms[k].position + r_ij = pos_j - pos_i + r_jk = pos_k - pos_j + comb_ijk = (1-a) * r_ij + a * r_jk + traj[ts.frame] = pos_i + b * (comb_ijk / mda.lib.mdamath.norm(comb_ijk)) + + +# vs_3 func 3 -> Linear combination using 3 reference points +# angle in the plane defined, at given distance of the 3rd point +def vs3_func_3(ns, traj, vs_def_beads_ids, vs_params): + + i, j, k = vs_def_beads_ids + ang_deg, d = vs_params # degrees, nm + ang_rad = np.deg2rad(ang_deg) # retrieve radians + d = d * 10 # retrieve amgstrom for MDA + + for ts in ns.aa2cg_universe.trajectory: + pos_i = ns.aa2cg_universe.atoms[i].position + pos_j = ns.aa2cg_universe.atoms[j].position + pos_k = ns.aa2cg_universe.atoms[k].position + r_ij = pos_j - pos_i + r_jk = pos_k - pos_j + comb_ijk = r_jk - (np.dot(r_ij, r_jk) / np.dot(r_ij, r_ij)) * r_ij + traj[ts.frame] = pos_i + d * np.cos(ang_rad) * (r_ij / mda.lib.mdamath.norm(r_ij)) + d * np.sin(ang_rad) * (comb_ijk / mda.lib.mdamath.norm(comb_ijk)) + + +# vs_3 func 4 -> Linear combination using 3 reference points +# out of plane +def vs3_func_4(ns, traj, vs_def_beads_ids, vs_params): + + i, j, k = vs_def_beads_ids + a, b, c = vs_params # weight, weight, nm**(-1) + c = c / 10 # retrieve amgstrom**(-1) for MDA + + for ts in ns.aa2cg_universe.trajectory: + pos_i = ns.aa2cg_universe.atoms[i].position + pos_j = ns.aa2cg_universe.atoms[j].position + pos_k = ns.aa2cg_universe.atoms[k].position + r_ij = pos_j - pos_i + r_ik = pos_k - pos_i + traj[ts.frame] = pos_i + a * r_ij + b * r_ik - c * (r_ij / mda.lib.mdamath.norm(r_ij) * r_ik / mda.lib.mdamath.norm(r_ik)) + + +# Functions for virtual_sites4 + +# vs_4 func 2 -> Linear combination using 3 reference points +# NOTE: only function 2 is defined for vs_4 in GROMACS, because it replaces function 1 +# which still exists for retro compatibility but its usage must be avoided +def vs4_func_2(ns, traj, vs_def_beads_ids, vs_params): + + i, j, k, l = vs_def_beads_ids + a, b, c = vs_params # weight, weight, nm + c = c * 10 # retrieve amgstrom for MDA + + for ts in ns.aa2cg_universe.trajectory: + pos_i = ns.aa2cg_universe.atoms[i].position + pos_j = ns.aa2cg_universe.atoms[j].position + pos_k = ns.aa2cg_universe.atoms[k].position + pos_l = ns.aa2cg_universe.atoms[l].position + r_ij = pos_j - pos_i + r_ik = pos_k - pos_i + r_il = pos_l - pos_i + r_ja = a * r_ik - r_ij + r_jb = b * r_il - r_ij + r_m = np.cross(r_ja, r_jb) + traj[ts.frame] = pos_i - c * (r_m / mda.lib.mdamath.norm(r_m)) + + +# Functions for virtual_sitesn + +# vs_n func 1 -> Center of Geometry +def vsn_func_1(ns, traj, vs_def_beads_ids): + + for ts in ns.aa2cg_universe.trajectory: + traj[ts.frame] = ns.aa2cg_universe.atoms[vs_def_beads_ids].center_of_geometry(pbc=None) + + +# vs_n func 2 -> Center of Mass +def vsn_func_2(ns, traj, vs_def_beads_ids, bead_id): + + # inform user if this VS definition uses beads (or VS) with mass 0, + # because this is COM so 0 mass means a bead that was marked for defining the VS is in fact ignored + zero_mass_beads_ids = [] + for bid in vs_def_beads_ids: + if bid in ns.cg_itp['virtual_sitesn']: + if ns.cg_itp['virtual_sitesn'][bid]['mass'] == 0: + zero_mass_beads_ids.append(bid) + if len(zero_mass_beads_ids) > 0: + print(' WARNING: Virtual site ID {} uses function 2 for COM, but its definition contains IDs ' + ' '.join(zero_mass_beads_ids) + 'which have no mass'.format(bead_id + 1)) + + for ts in ns.aa2cg_universe.trajectory: + traj[ts.frame] = ns.aa2cg_universe.atoms[vs_def_beads_ids].center_of_mass(pbc=None) + + +# vs_n func 3 -> Center of Weights (each atom has a given weight, pairwise formatting: id1 w1 id2 w2 ..) +def vsn_func_3(ns, traj, vs_def_beads_ids, vs_params): + + masses_and_weights = np.array([ns.aa2cg_universe.atoms[vs_def_beads_ids[i]].mass * vs_params[i] for i in range(len(vs_def_beads_ids))]) + for ts in ns.aa2cg_universe.trajectory: + traj[ts.frame] = ns.aa2cg_universe.atoms[vs_def_beads_ids].center(masses_and_weights) + + + + + + diff --git a/swarmcg/swarmCG.py b/swarmcg/swarmCG.py index c57764b..6f65d83 100644 --- a/swarmcg/swarmCG.py +++ b/swarmcg/swarmCG.py @@ -1,202 +1,53 @@ -from pyemd import emd -import sys, re, random, os, shutil, subprocess, signal, time, contextlib, warnings, textwrap +# some numpy version have this ufunc warning at import + many packages call numpy and display annoying warnings +import warnings +warnings.filterwarnings("ignore") +import sys, re, random, os, shutil, subprocess, signal, time, contextlib +import warnings, collections +from datetime import datetime # matplotlib new version has some problems with incorrectly uninstalled files at version upgrade and display a lot of warnings # also some numpy version have this ufunc warning at import -warnings.filterwarnings("ignore") import numpy as np import matplotlib -matplotlib.use('AGG') # use the Anti-Grain Geometry non-interactive backend suited for scripted PNG creation import matplotlib.pyplot as plt +import MDAnalysis as mda +from pyemd import emd from scipy.spatial.distance import cdist -from scipy.cluster.hierarchy import linkage, fcluster -from scipy.spatial.distance import squareform from scipy.optimize import curve_fit -from scipy.signal import lfiltic, lfilter -from itertools import compress -# import networkx as nx -import MDAnalysis as mda -import collections -from datetime import datetime -from . import config -warnings.resetwarnings() - -# TODO: When provided trajectory file does NOT contain PBC infos (box position and size for each frame, which are present in XTC format for example), we want to stil accept the provided trajectory format (if accepted by MDAnalysis) but we automatically disable the handling of PBC by the code +from swarmcg import config +from swarmcg.shared import utils, styling +from swarmcg.shared import exceptions +from swarmcg.simulations.potentials import (gmx_bonds_func_1, gmx_angles_func_1, gmx_angles_func_2, + gmx_dihedrals_func_1, gmx_dihedrals_func_2) +import swarmcg.simulations.vs_functions as vsf -# String 'S m a r t . C G' Ivrit style Fitted/Full -def header_package(module_line): - - return '''\ - - - ███████╗██╗ ██╗ █████╗ ██████╗ ███╗ ███╗ ██████╗ ██████╗ - ██╔════╝██║ ██║██╔══██╗██╔══██╗████╗ ████║ ██╔════╝██╔════╝ - ███████╗██║ █╗ ██║███████║██████╔╝██╔████╔██║█████╗██║ ██║ ███╗ - ╚════██║██║███╗██║██╔══██║██╔══██╗██║╚██╔╝██║╚════╝██║ ██║ ██║ - ███████║╚███╔███╔╝██║ ██║██║ ██║██║ ╚═╝ ██║ ╚██████╗╚██████╔╝ - ╚══════╝ ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝ ╚═════╝ v '''+config.module_version+''' - '''+module_line+''' -'''+config.sep_close+''' -| Swarm-CG is distributed under the terms of the MIT License. | -| | -| Feedback, questions and bug reports are welcome at: | -| '''+config.github_url+'''/issues | -| | -| If you found Swarm-CG useful in your research, please cite: | -| Swarm-CG: Automatic parametrization of bonded terms in CG models | -| of simple to complex molecules via FST-PSO | -| Empereur-mot C., Pesce L., Bochicchio D., Perego C., Pavan G.M. ChemRxiv 2020 | -| | -| Swarm-CG relies on FST-PSO: | -| Fuzzy Self-Tuning PSO: A settings-free algorithm for global optimization | -| Nobile M.S., Cazzaniga P., Besozzi D., Colombo R., Mauri G., Pasia G. Swarm Evo Comp 2018 | -'''+config.sep_close+'\n' - - -def forward_fill(arr, cond_value): - - # out = np.empty(len(arr)) - valid_val = None - for i in range(len(arr)): - if arr[i] != cond_value: - # out[i] = arr[i] - valid_val = arr[i] - else: - j = i - while valid_val == None and j < len(arr): - j += 1 - try: - if arr[j] != cond_value: - valid_val = arr[j] - break - except IndexError: - sys.exit(config.header_error+'Unexpected read of the optimization results, please check that your simulations have not all been crashing') - if valid_val != None: - # out[i] = valid_val - arr[i] = valid_val - else: - sys.exit('All simulations crashed, nothing to display\nPlease check the setup and settings of your optimization run') - # return out - return - - -# simple moving average -def sma(interval, window_size): - window = np.ones(int(window_size))/float(window_size) - return np.convolve(interval, window, 'same') - +matplotlib.use('AGG') # use the Anti-Grain Geometry non-interactive backend suited for scripted PNG creation +warnings.resetwarnings() -# exponential moving average -def ewma(a, alpha, windowSize): - wghts = (1-alpha)**np.arange(windowSize) - wghts /= wghts.sum() - out = np.full(len(a), np.nan) - # out[windowSize-1:] = np.convolve(a, wghts, 'valid') - out = np.convolve(a, wghts, 'same') - return out +# TODO: When provided trajectory file does NOT contain PBC infos (box position and size for each frame, which are present in XTC format for example), we want to stil accept the provided trajectory format (if accepted by MDAnalysis) but we automatically disable the handling of PBC by the code # cast object as string, enclose by parentheses and return a string -- for arguments display in help def par_wrap(string): - return '('+str(string)+')' + return f'({string})' # set MDAnalysis backend and number of threads def set_MDA_backend(ns): - # TODO: propagate number of threads to the functions calls of MDAnalysis, which means do a PR on MDAnalysis github - # ns.mda_backend = 'serial' # atm force serial in case code is executed on clusters, because MDA will use all threads by default - - if mda.lib.distances.USED_OPENMP: # if MDAnalysis was compiled with OpenMP support + if mda.lib.distances.USED_OPENMP: # if MDAnalysis was compiled with OpenMP support ns.mda_backend = 'OpenMP' + # TODO: propagate number of threads to the functions calls of MDAnalysis, which means do a PR on MDAnalysis github for having functions arguments to do that because atm we cannot else: # print('MDAnalysis was compiled without OpenMP support, calculation of bonds/angles/dihedrals distributions will use a single thread') ns.mda_backend = 'serial' - return - # draw random float between given range and apply rounding to given digit def draw_float(low, high, dg_rnd): - - return round(random.uniform(low, high), dg_rnd) # low and high included - -# # read atomistic ITP -# def read_aa_itp_file(ns): - -# ns.all_atoms = dict() # atom centered connectivity + atom type + heavy atom boolean + bead(s) to which the atom belongs (can belong to multiple beads depending on mapping) -# all_atom_types = set() -# total_charge = 0 - -# with open(ns.aa_itp_filename, 'r') as fp: - -# itp_lines = fp.read().split('\n') -# itp_lines = [itp_line.strip().split(';')[0] for itp_line in itp_lines] - -# # error handling, check if the ITP file unexpectedly has several [bonds] or [atoms] sections -# nb_bonds_sections, nb_atoms_sections = 0, 0 -# for itp_line in itp_lines: -# if itp_line != '': -# if bool(re.search('\[.*bonds.*\]', itp_line)): -# nb_bonds_sections += 1 -# elif bool(re.search('\[.*atoms.*\]', itp_line)): -# nb_atoms_sections += 1 - -# if nb_bonds_sections != 1: -# sys.exit(config.header_error+'Incorrect number of [bonds] sections in atomistic ITP file ('+str(nb_bonds_sections)+' sections)') -# if nb_atoms_sections != 1: -# sys.exit(config.header_error+'Incorrect number of [atoms] sections in atomistic ITP file ('+str(nb_atoms_sections)+' sections)') - -# r_atoms, r_bonds = False, False - -# for itp_line in itp_lines: -# if itp_line != '': - -# if bool(re.search('\[.*atoms.*\]', itp_line)): -# r_atoms, r_bonds = True, False -# elif bool(re.search('\[.*bonds.*\]', itp_line)): -# r_atoms, r_bonds = False, True -# elif bool(re.search('\[.*\]', itp_line)): # ignore all other sections -# r_atoms, r_bonds = False, False -# else: -# sp_itp_line = itp_line.split() - -# if r_atoms: - -# atom_id, atom_type, atom_charge = int(sp_itp_line[0]), sp_itp_line[1][0].upper(), float(sp_itp_line[6]) -# # atom_id, atom_type, atom_charge, atom_mass = int(sp_itp_line[0]), sp_itp_line[1][0].upper(), float(sp_itp_line[6]), float(sp_itp_line[7]) -# atom_id -= 1 # retrieve indexing from 0 for atoms IDS for MDAnalysis -# total_charge += atom_charge - -# atom_heavy = True -# if atom_type[0].upper() == 'H': -# atom_heavy = False -# if not atom_id in ns.all_atoms: -# all_atom_types.add(atom_type) -# ns.all_atoms[atom_id] = {'conn': set(), 'atom_type': atom_type, 'heavy': atom_heavy, 'beads_ids': set(), 'beads_types': set(), 'residue_names': set()} -# # ns.all_atoms[atom_id] = {'conn': set(), 'atom_type': atom_type, 'heavy': atom_heavy, 'atom_mass': atom_mass, 'beads_ids': set(), 'beads_types': set(), 'residue_names': set()} -# else: -# ns.all_atoms[atom_id]['atom_type'] = atom_type -# ns.all_atoms[atom_id]['heavy'] = atom_heavy -# # ns.all_atoms[atom_id]['atom_mass'] = atom_mass - -# elif r_bonds: - -# atom_id_1, atom_id_2 = int(sp_itp_line[0])-1, int(sp_itp_line[1])-1 # retrieve indexing from 0 for atoms IDS for MDAnalysis -# if not atom_id_1 in ns.all_atoms: -# ns.all_atoms[atom_id_1] = {'conn': set(), 'atom_type': None, 'heavy': None, 'beads_ids': set(), 'beads_types': set(), 'residue_names': set()} -# # ns.all_atoms[atom_id_1] = {'conn': set(), 'atom_type': None, 'heavy': None, 'atom_mass': None, 'beads_ids': set(), 'beads_types': set(), 'residue_names': set()} -# ns.all_atoms[atom_id_1]['conn'].add(atom_id_2) -# if not atom_id_2 in ns.all_atoms: -# ns.all_atoms[atom_id_2] = {'conn': set(), 'atom_type': None, 'heavy': None, 'beads_ids': set(), 'beads_types': set(), 'residue_names': set()} -# # ns.all_atoms[atom_id_2] = {'conn': set(), 'atom_type': None, 'heavy': None, 'atom_mass': None, 'beads_ids': set(), 'beads_types': set(), 'residue_names': set()} -# ns.all_atoms[atom_id_2]['conn'].add(atom_id_1) - -# print('Net charge in the reference all atom model:', round(total_charge, 4)) - -# return + return round(random.uniform(low, high), dg_rnd) # low and high included # read one or more molecules from the AA TPR and trajectory @@ -207,15 +58,14 @@ def load_aa_data(ns): if ns.molname_in == None: - molname_atom_group = ns.aa_universe.atoms[0].fragment # select the AA connected graph for the first moltype found in TPR + molname_atom_group = ns.aa_universe.atoms[0].fragment # select the AA connected graph for the first moltype found in TPR ns.all_aa_mols.append(molname_atom_group) - # print(dir(molname_atom_group.atoms[0])) # for dev, display properties # atoms and their attributes for i in range(len(molname_atom_group)): atom_id = ns.aa_universe.atoms[i].id - atom_type = ns.aa_universe.atoms[i].type[0] # TODO: using only first letter but do better with masses for exemple to discriminate/verify 2 letters atom types + atom_type = ns.aa_universe.atoms[i].type[0] # this was to check for hydrogens but we don't need it atm atom_charge = ns.aa_universe.atoms[i].charge atom_heavy = True if atom_type[0].upper() == 'H': @@ -224,15 +74,8 @@ def load_aa_data(ns): ns.all_atoms[atom_id] = {'conn': set(), 'atom_type': atom_type, 'atom_charge': atom_charge, 'heavy': atom_heavy, 'beads_ids': set(), 'beads_types': set(), 'residue_names': set()} # print(ns.aa_universe.atoms[i].id, ns.aa_universe.atoms[i]) - # bonds - for i in range(len(molname_atom_group.bonds)): - - atom_id_1 = molname_atom_group.bonds[i][0].id - atom_id_2 = molname_atom_group.bonds[i][1].id - ns.all_atoms[atom_id_1]['conn'].add(atom_id_2) - ns.all_atoms[atom_id_2]['conn'].add(atom_id_1) - - # TODO: read multiple instances of give moltype -- for membranes analysis -- USE RESINDEX property or MOLNUM, check for more useful properties + # TODO: allow reading multiple instances of a molecule to build the reference distributions, + # for extended usage with NOT just one flexible molecule in solvent else: pass @@ -252,233 +95,428 @@ def load_aa_data(ns): # print(" ", sel.atoms[atom.id].fragment) # TODO: print this charge, if it is not null then we need to check for Q-type beads and for the 2 Q-types that have no defined charge value, raise a warning to tell the user he has to edit the file manually - net_charge = molname_atom_group.total_charge() + # net_charge = molname_atom_group.total_charge() # print('Net charge of the reference all atom model:', round(net_charge, 4)) - return - # check if functions present in CG ITP file can be used by this program, if not we throw an error -def verify_handled_functions(geom, func_obj, line_obj): +# authorized functions are defined in config.py (we switch them on in config.py once we have tested them) +def verify_handled_functions(geom, func, line_nb): try: - func = int(func_obj) + func = int(func) except (ValueError, IndexError): - sys.exit(config.header_error+'Error while reading CG ITP file at line '+str(line_obj)+', please check this file') - - if geom == 'constraint' and func not in config.handled_constraints_functions: - sys.exit(config.header_error+'Error while reading constraint function in CG ITP file at line '+str(line_obj)+'\nThis potential function is not implemented in Swarm-CG at the moment\nPlease use one of these constraint potential functions: '+", ".join(map(str, config.handled_constraints_functions))) - elif geom == 'bond' and func not in config.handled_bonds_functions: - sys.exit(config.header_error+'Error while reading bond function in CG ITP file at line '+str(line_obj)+'\nThis potential function is not implemented in Swarm-CG at the moment\nPlease use one of these bond potential functions: '+", ".join(map(str, config.handled_bonds_functions))) - elif geom == 'angle' and func not in config.handled_angles_functions: - sys.exit(config.header_error+'Error while reading angle function in CG ITP file at line '+str(line_obj)+'\nThis potential function is not implemented in Swarm-CG at the moment\nPlease use one of these angle potential functions: '+", ".join(map(str, config.handled_angles_functions))) - elif geom == 'dihedral' and func not in config.handled_dihedrals_functions: - sys.exit(config.header_error+'Error while reading dihedral function in CG ITP file at line '+str(line_obj)+'\nThis potential function is not implemented in Swarm-CG at the moment\nPlease use one of these dihedral potential functions: '+", ".join(map(str, config.handled_dihedrals_functions))) + msg = ( + f"Unexpected error while reading CG ITP file at line {line_nb}, please check this file." + ) + raise exceptions.MissformattedFile(msg) + + if func not in config.handled_functions[geom]: + functions_str = ", ".join(map(str, config.handled_functions[geom])) + if functions_str == '': + functions_str = 'None' + msg = ( + f"Error while reading {geom} function in CG ITP file at line {line_nb}.\n" + f"This potential function is not implemented in Swarm-CG at the moment.\n" + f"Please use one of these {geom} potential functions: {functions_str}.\n\n" + f"If you feel this is an important missing feature, please feel free to\n" + f"open an issue on github at {config.github_url}/issues." + ) + raise exceptions.MissformattedFile(msg) + + return func + + +# TODO: the 3 next functions below (section_switch, vs_error_control, read_cg_itp_file) could be isolated in a sort of +# "topology reader" class, and next we would include the formats from other MD engines + +# sections switch for reading ITP sections +def section_switch(section_read, section_active): + + for section_current in section_read: + section_read[section_current] = False + if section_active is not None: + section_read[section_active] = True + + +# vs_type in [2, 3, 4, n], then they each have specific functions to define their positions +def vs_error_control(ns, bead_id, vs_type, func, line_nb, vs_def_beads_ids=None): + + if bead_id >= len(ns.cg_itp['atoms']): + msg = ( + f"A virtual site is defined for ID {bead_id + 1}, while this ID exceeds the number of atoms" + f" defined in the CG ITP file." + ) + raise exceptions.MissformattedFile(msg) + + if vs_def_beads_ids is not None: + for bid in vs_def_beads_ids: + if bid >= len(ns.cg_itp['atoms']): + msg = ( + f"The definition of virtual site ID {bead_id + 1} makes use of ID {bid + 1}, while this ID exceeds" + f" the number of atoms defined in the CG ITP file." + ) + raise exceptions.MissformattedFile(msg) + + if not ns.cg_itp['atoms'][bead_id]['bead_type'].startswith('v'): + msg = ( + f"CG bead number {bead_id + 1} is referenced to as a virtual site, but its bead type" + f" does NOT start with letter 'v'." + ) + raise exceptions.MissformattedFile(msg) + + vs_type_str = f'virtual_sites{vs_type}' + func = verify_handled_functions(vs_type_str, func, line_nb) return func # read coarse-grain ITP -def read_cg_itp_file(ns, itp_lines): +def read_cg_itp_file(ns): print('Reading Coarse-Grained (CG) ITP file') - ns.cg_itp = {'moleculetype': {'molname': '', 'nrexcl': 0}, 'atoms': [], 'constraint': [], 'bond': [], 'angle': [], 'dihedral': [], 'exclusion': []} + ns.cg_itp = {'moleculetype': {'molname': '', 'nrexcl': 0}, 'atoms': [], 'constraint': [], 'bond': [], 'angle': [], 'dihedral': [], 'virtual_sites2': {}, 'virtual_sites3': {}, 'virtual_sites4': {}, 'virtual_sitesn': {}, 'exclusion': []} + ns.real_beads_ids, ns.vs_beads_ids = [], [] ns.nb_constraints, ns.nb_bonds, ns.nb_angles, ns.nb_dihedrals = -1, -1, -1, -1 + with open(ns.cg_itp_filename, 'r') as fp: + try: + itp_lines = fp.read().split('\n') + itp_lines = [itp_line.split(';')[0].strip() for itp_line in itp_lines] + except UnicodeDecodeError: + msg = "Cannot read CG ITP, it seems you provided a binary file." + raise exceptions.MissformattedFile(msg) + + section_read = { + 'moleculetype': False, + 'atom': False, + 'constraint': False, + 'bond': False, + 'angle': False, + 'dihedral': False, + 'vs_2': False, + 'vs_3': False, + 'vs_4': False, + 'vs_n': False, + 'exclusion': False + } + for i in range(len(itp_lines)): itp_line = itp_lines[i] - if itp_line != '' and not itp_line.startswith(';'): + if itp_line != '': if bool(re.search('\[.*moleculetype.*\]', itp_line)): - r_moleculetype, r_atoms, r_constraints, r_bonds, r_angles, r_dihedrals, r_exclusion = True, False, False, False, False, False, False + section_switch(section_read, 'moleculetype') elif bool(re.search('\[.*atoms.*\]', itp_line)): - r_moleculetype, r_atoms, r_constraints, r_bonds, r_angles, r_dihedrals, r_exclusion = False, True, False, False, False, False, False + section_switch(section_read, 'atom') elif bool(re.search('\[.*constraint.*\]', itp_line)): - r_moleculetype, r_atoms, r_constraints, r_bonds, r_angles, r_dihedrals, r_exclusion = False, False, True, False, False, False, False + section_switch(section_read, 'constraint') elif bool(re.search('\[.*bond.*\]', itp_line)): - r_moleculetype, r_atoms, r_constraints, r_bonds, r_angles, r_dihedrals, r_exclusion = False, False, False, True, False, False, False + section_switch(section_read, 'bond') elif bool(re.search('\[.*angle.*\]', itp_line)): - r_moleculetype, r_atoms, r_constraints, r_bonds, r_angles, r_dihedrals, r_exclusion = False, False, False, False, True, False, False + section_switch(section_read, 'angle') elif bool(re.search('\[.*dihedral.*\]', itp_line)): - r_moleculetype, r_atoms, r_constraints, r_bonds, r_angles, r_dihedrals, r_exclusion = False, False, False, False, False, True, False + section_switch(section_read, 'dihedral') + elif bool(re.search('\[.*virtual_sites2.*\]', itp_line)): + section_switch(section_read, 'vs_2') + elif bool(re.search('\[.*virtual_sites3.*\]', itp_line)): + section_switch(section_read, 'vs_3') + elif bool(re.search('\[.*virtual_sites4.*\]', itp_line)): + section_switch(section_read, 'vs_4') + elif bool(re.search('\[.*virtual_sitesn.*\]', itp_line)): + section_switch(section_read, 'vs_n') elif bool(re.search('\[.*exclusion.*\]', itp_line)): - r_moleculetype, r_atoms, r_constraints, r_bonds, r_angles, r_dihedrals, r_exclusion = False, False, False, False, False, False, True - elif bool(re.search('\[.*\]', itp_line)): # all other sections - r_moleculetype, r_atoms, r_constraints, r_bonds, r_angles, r_dihedrals, r_exclusion = False, False, False, False, False, False, False + section_switch(section_read, 'exclusion') + elif bool(re.search('\[.*\]', itp_line)): # all other sections + section_switch(section_read, None) else: sp_itp_line = itp_line.split() - if r_moleculetype: + if section_read['moleculetype']: ns.cg_itp['moleculetype']['molname'], ns.cg_itp['moleculetype']['nrexcl'] = sp_itp_line[0], int(sp_itp_line[1]) - elif r_atoms: + elif section_read['atom']: - bead_id, bead_type, resnr, residue, atom, cgnr, charge = sp_itp_line[:7] - try: - mass_and_eol = ' '.join(sp_itp_line[7:]) - ns.cg_itp['atoms'].append({'bead_id': int(bead_id)-1, 'bead_type': bead_type, 'resnr': int(resnr), 'residue': residue, 'atom': atom, 'cgnr': int(cgnr), 'charge': float(charge), 'mass_and_eol': mass_and_eol}) # retrieve indexing from 0 for CG beads IDS for MDAnalysis - except IndexError: - ns.cg_itp['atoms'].append({'bead_id': int(bead_id)-1, 'bead_type': bead_type, 'resnr': int(resnr), 'residue': residue, 'atom': atom, 'cgnr': int(cgnr), 'charge': float(charge)}) # retrieve indexing from 0 for CG beads IDS for MDAnalysis + # TODO: test what happens if there are VS in the middle of real CG beads in the [ atoms ] section + # because most probably this won't be OK -- Not sure who does this though, but seems possible + + if len(sp_itp_line) == 7: + bead_id, bead_type, resnr, residue, atom, cgnr, charge = sp_itp_line[:7] + mass = None + + # In case the masses are ABSENT in the ITP file (probably the most normal case with + # MARTINI usage), then we will read the CG masses from the TPR file to avoid having + # to look into TOP and potentially multiple ITP files: + # + # - from evaluate_model.py this means a TPR has been provided already, if the user is not using + # the script for exclusive AA distributions inspection (in which case we don't need the masses + # at all because we will use mapped/splitted weights of the atoms into CG beads exclusively anyway) + # - from optimize_model.py all the ITP included in the TOP file are read to find + # appropriate masses + + elif len(sp_itp_line) == 8: + bead_id, bead_type, resnr, residue, atom, cgnr, charge, mass = sp_itp_line[:8] + mass = float(mass) + else: + msg = ( + "The atom description from the input itp file: \n\n {} \n\n" + "does not contain the correct number of fields. Please insert " + "the following information: \n\n bead_id, bead_type, resnr, " + "residue, atom, cgnr, charge, [mass] \n\n".format(itp_line) + ) + raise exceptions.MissformattedFile(msg) + + # discriminate between real beads and virtual sites + if bead_type.startswith('v'): + ns.vs_beads_ids.append(int(bead_id) - 1) + else: + ns.real_beads_ids.append(int(bead_id) - 1) - elif r_constraints: + # assignment of the variables value + ns.cg_itp['atoms'].append({'bead_id': int(bead_id) - 1, 'bead_type': bead_type, 'resnr': int(resnr), 'residue': residue,'atom': atom, 'cgnr': int(cgnr), 'charge': float(charge), 'mass': mass, 'vs_type': None}) + # here there is still MASS and VS_TYPE that are subject to later modification + + if not len(ns.cg_itp['atoms']) == int(bead_id): + msg = ( + f"Swarm-CG handles .itp files with atoms indexed consecutively starting from 1.\n" + f"The bead numbered {bead_id + 1} does not follow this formatting." + ) + raise exceptions.MissformattedFile(msg) + + elif section_read['constraint']: # beginning of a new group if itp_lines[i-1] == '' or itp_lines[i-1].startswith(';') or bool(re.search('\[.*constraint.*\]', itp_lines[i-1])): ns.nb_constraints += 1 if itp_lines[i-1].startswith('; constraint type'): - geom_type = itp_lines[i-1].split()[3] # if the current CG ITP was generated with our package + geom_type = itp_lines[i-1].split()[3] # if the current CG ITP was generated with our package else: geom_type = str(len(ns.cg_itp['constraint'])+1) - ns.cg_itp['constraint'].append({'geom_type': geom_type, 'beads': [], 'funct': [], 'value': [], 'fct': [], 'plt_id': []}) # initialize storage for this new group - + ns.cg_itp['constraint'].append({'geom_type': geom_type, 'beads': [], 'func': [], 'value': [], 'fct': []}) # initialize storage for this new group + try: - ns.cg_itp['constraint'][ns.nb_constraints]['beads'].append([int(bead_id)-1 for bead_id in sp_itp_line[0:2]]) # retrieve indexing from 0 for CG beads IDS for MDAnalysis + ns.cg_itp['constraint'][ns.nb_constraints]['beads'].append([int(bead_id)-1 for bead_id in sp_itp_line[0:2]]) # retrieve indexing from 0 for CG beads IDS for MDAnalysis except ValueError: - sys.exit(config.header_error+'Incorrect reading of the CG ITP file within [constraints] section, please check this file') + msg = ( + "Incorrect reading of the CG ITP file within [constraints] section.\n" + "Please check this file." + ) + raise exceptions.MissformattedFile(msg) + func = verify_handled_functions('constraint', sp_itp_line[2], i+1) - ns.cg_itp['constraint'][ns.nb_constraints]['funct'].append(func) + ns.cg_itp['constraint'][ns.nb_constraints]['func'].append(func) ns.cg_itp['constraint'][ns.nb_constraints]['value'].append(float(sp_itp_line[3])) - try: - ns.cg_itp['constraint'][ns.nb_constraints]['plt_id'].append(sp_itp_line[6]) - except IndexError: - ns.cg_itp['constraint'][ns.nb_constraints]['plt_id'].append('') - elif r_bonds: + elif section_read['bond']: # beginning of a new group if itp_lines[i-1] == '' or itp_lines[i-1].startswith(';') or bool(re.search('\[.*bond.*\]', itp_lines[i-1])): ns.nb_bonds += 1 if itp_lines[i-1].startswith('; bond type'): - geom_type = itp_lines[i-1].split()[3] # if the current CG ITP was generated with our package + geom_type = itp_lines[i-1].split()[3] # if the current CG ITP was generated with our package else: geom_type = str(len(ns.cg_itp['bond'])+1) - ns.cg_itp['bond'].append({'geom_type': geom_type, 'beads': [], 'funct': [], 'value': [], 'fct': [], 'plt_id': []}) # initialize storage for this new group - + ns.cg_itp['bond'].append({'geom_type': geom_type, 'beads': [], 'func': [], 'value': [], 'fct': []}) # initialize storage for this new group + try: - ns.cg_itp['bond'][ns.nb_bonds]['beads'].append([int(bead_id)-1 for bead_id in sp_itp_line[0:2]]) # retrieve indexing from 0 for CG beads IDS for MDAnalysis + ns.cg_itp['bond'][ns.nb_bonds]['beads'].append([int(bead_id)-1 for bead_id in sp_itp_line[0:2]]) # retrieve indexing from 0 for CG beads IDS for MDAnalysis except ValueError: - sys.exit(config.header_error+'Incorrect reading of the CG ITP file within [bonds] section, please check this file') + msg = ( + "Incorrect reading of the CG ITP file within [bonds] section.\n" + "Please check this file." + ) + raise exceptions.MissformattedFile(msg) + func = verify_handled_functions('bond', sp_itp_line[2], i+1) - ns.cg_itp['bond'][ns.nb_bonds]['funct'].append(func) + ns.cg_itp['bond'][ns.nb_bonds]['func'].append(func) ns.cg_itp['bond'][ns.nb_bonds]['value'].append(float(sp_itp_line[3])) ns.cg_itp['bond'][ns.nb_bonds]['fct'].append(float(sp_itp_line[4])) - try: - ns.cg_itp['bond'][ns.nb_bonds]['plt_id'].append(sp_itp_line[7]) - except IndexError: - ns.cg_itp['bond'][ns.nb_bonds]['plt_id'].append('') - elif r_angles: + elif section_read['angle']: # beginning of a new group if itp_lines[i-1] == '' or itp_lines[i-1].startswith(';') or bool(re.search('\[.*angle.*\]', itp_lines[i-1])): ns.nb_angles += 1 if itp_lines[i-1].startswith('; angle type'): - geom_type = itp_lines[i-1].split()[3] # if the current CG ITP was generated with our package + geom_type = itp_lines[i-1].split()[3] # if the current CG ITP was generated with our package else: geom_type = str(len(ns.cg_itp['angle'])+1) - ns.cg_itp['angle'].append({'geom_type': geom_type, 'beads': [], 'funct': [], 'value': [], 'fct': [], 'plt_id': []}) # initialize storage for this new group - + ns.cg_itp['angle'].append({'geom_type': geom_type, 'beads': [], 'func': [], 'value': [], 'fct': []}) # initialize storage for this new group + try: ns.cg_itp['angle'][ns.nb_angles]['beads'].append([int(bead_id)-1 for bead_id in sp_itp_line[0:3]]) # retrieve indexing from 0 for CG beads IDS for MDAnalysis except ValueError: - sys.exit(config.header_error+'Incorrect reading of the CG ITP file within [angles] section, please check this file') + msg = ( + "Incorrect reading of the CG ITP file within [angles] section.\n" + "Please check this file." + ) + raise exceptions.MissformattedFile(msg) + func = verify_handled_functions('angle', sp_itp_line[3], i+1) - ns.cg_itp['angle'][ns.nb_angles]['funct'].append(func) + ns.cg_itp['angle'][ns.nb_angles]['func'].append(func) ns.cg_itp['angle'][ns.nb_angles]['value'].append(float(sp_itp_line[4])) ns.cg_itp['angle'][ns.nb_angles]['fct'].append(float(sp_itp_line[5])) - try: - ns.cg_itp['angle'][ns.nb_angles]['plt_id'].append(sp_itp_line[8]) - except IndexError: - ns.cg_itp['angle'][ns.nb_angles]['plt_id'].append('') - elif r_dihedrals: + elif section_read['dihedral']: # beginning of a new group if itp_lines[i-1] == '' or itp_lines[i-1].startswith(';') or bool(re.search('\[.*dihedral.*\]', itp_lines[i-1])): ns.nb_dihedrals += 1 if itp_lines[i-1].startswith('; dihedral type'): - geom_type = itp_lines[i-1].split()[3] # if the current CG ITP was generated with our package + geom_type = itp_lines[i-1].split()[3] # if the current CG ITP was generated with our package else: geom_type = str(len(ns.cg_itp['dihedral'])+1) - ns.cg_itp['dihedral'].append({'geom_type': geom_type, 'beads': [], 'funct': [], 'value': [], 'fct': [], 'plt_id': [], 'mult': []}) # initialize storage for this new group + ns.cg_itp['dihedral'].append({'geom_type': geom_type, 'beads': [], 'func': [], 'value': [], 'fct': [], 'mult': []}) # initialize storage for this new group try: - ns.cg_itp['dihedral'][ns.nb_dihedrals]['beads'].append([int(bead_id)-1 for bead_id in sp_itp_line[0:4]]) # retrieve indexing from 0 for CG beads IDS for MDAnalysis + ns.cg_itp['dihedral'][ns.nb_dihedrals]['beads'].append([int(bead_id)-1 for bead_id in sp_itp_line[0:4]]) # retrieve indexing from 0 for CG beads IDS for MDAnalysis except ValueError: - sys.exit(config.header_error+'Incorrect reading of the CG ITP file within [dihedrals] section, please check this file') + msg = ( + "Incorrect reading of the CG ITP file within [dihedrals] section.\n" + "Please check this file." + ) + raise exceptions.MissformattedFile(msg) + func = verify_handled_functions('dihedral', sp_itp_line[4], i+1) - ns.cg_itp['dihedral'][ns.nb_dihedrals]['funct'].append(func) - ns.cg_itp['dihedral'][ns.nb_dihedrals]['value'].append(float(sp_itp_line[5])) # issue happens here for functions that are not handled + ns.cg_itp['dihedral'][ns.nb_dihedrals]['func'].append(func) + ns.cg_itp['dihedral'][ns.nb_dihedrals]['value'].append(float(sp_itp_line[5])) # issue happens here for functions that are not handled ns.cg_itp['dihedral'][ns.nb_dihedrals]['fct'].append(float(sp_itp_line[6])) # handle multiplicity if function assumes multiplicity if func in config.dihedral_func_with_mult: - try: # correct read of the provided multiplicity + try: ns.cg_itp['dihedral'][ns.nb_dihedrals]['mult'].append(int(sp_itp_line[7])) - except (IndexError, ValueError): # incorrect read of multiplicity -- or it was expected but not provided - print(' Missing multiplicity for dihedral at ITP line '+str(i+1)+', assumed multiplicity 1') - ns.cg_itp['dihedral'][ns.nb_dihedrals]['mult'].append(1) - else: # no multiplicity parameter is expected + except (IndexError, ValueError): # incorrect read of multiplicity + msg = f"Incorrect read of multiplicity in dihedral with potential function {func} at ITP line {i+1}." + raise exceptions.MissformattedFile(msg) + else: # no multiplicity parameter is expected ns.cg_itp['dihedral'][ns.nb_dihedrals]['mult'].append(None) - try: - ns.cg_itp['dihedral'][ns.nb_dihedrals]['plt_id'].append(sp_itp_line[9]) - except IndexError: - ns.cg_itp['dihedral'][ns.nb_dihedrals]['plt_id'].append('') + elif section_read['vs_2']: + + vs_type = 2 + bead_id = int(sp_itp_line[0])-1 + vs_def_beads_ids = [int(bid)-1 for bid in sp_itp_line[1:3]] + func = sp_itp_line[3] # will be casted to int in the verification below (for factorizing checks) + func = vs_error_control(ns, bead_id, vs_type, func, i + 1, vs_def_beads_ids) # i is the line number + vs_params = float(sp_itp_line[4]) + ns.cg_itp['atoms'][bead_id]['vs_type'] = vs_type + ns.cg_itp['virtual_sites2'][bead_id] = {'bead_id': bead_id, 'func': func, 'vs_def_beads_ids': vs_def_beads_ids, 'vs_params': vs_params} + + elif section_read['vs_3']: + + vs_type = 3 + bead_id = int(sp_itp_line[0])-1 + vs_def_beads_ids = [int(bid)-1 for bid in sp_itp_line[1:4]] + func = sp_itp_line[4] # will be casted to int in the verification below (for factorizing checks) + func = vs_error_control(ns, bead_id, vs_type, func, i + 1, vs_def_beads_ids) # i is the line number + if func in [1, 2, 3]: + vs_params = [float(param) for param in sp_itp_line[5:7]] + elif func == 4: + vs_params = [float(param) for param in sp_itp_line[5:8]] + ns.cg_itp['atoms'][bead_id]['vs_type'] = vs_type + ns.cg_itp['virtual_sites3'][bead_id] = {'bead_id': bead_id, 'func': func, 'vs_def_beads_ids': vs_def_beads_ids, 'vs_params': vs_params} + + elif section_read['vs_4']: + + vs_type = 4 + bead_id = int(sp_itp_line[0]) - 1 + vs_def_beads_ids = [int(bid) - 1 for bid in sp_itp_line[1:5]] + func = sp_itp_line[5] # will be casted to int in the verification below (for factorizing checks) + func = vs_error_control(ns, bead_id, vs_type, func, i + 1, vs_def_beads_ids) # i is the line number + vs_params = [float(param) for param in sp_itp_line[6:9]] + ns.cg_itp['atoms'][bead_id]['vs_type'] = vs_type + ns.cg_itp['virtual_sites4'][bead_id] = {'bead_id': bead_id, 'func': func, 'vs_def_beads_ids': vs_def_beads_ids, 'vs_params': vs_params} + + elif section_read['vs_n']: + + vs_type = 'n' + bead_id = int(sp_itp_line[0])-1 + func = sp_itp_line[1] # will be casted to int in verification below (for factorizing checks) + # here we do the check in 2 steps, because the reading of beads_ids depends on the function + func = vs_error_control(ns, bead_id, vs_type, func, i + 1, vs_def_beads_ids=None) # i is the line number + if func == 3: + vs_def_beads_ids = [int(sp_itp_line[2:][i])-1 for i in range(0, len(sp_itp_line[2:]), 2)] + vs_params = [float(sp_itp_line[2:][i]) for i in range(1, len(sp_itp_line[2:]), 2)] + else: + vs_def_beads_ids = [int(bid) - 1 for bid in sp_itp_line[2:]] + vs_params = None + func = vs_error_control(ns, bead_id, vs_type, func, i + 1, vs_def_beads_ids) # i is the line number + ns.cg_itp['atoms'][bead_id]['vs_type'] = vs_type + ns.cg_itp['virtual_sitesn'][bead_id] = {'bead_id': bead_id, 'func': func, 'vs_def_beads_ids': vs_def_beads_ids, 'vs_params': vs_params} - elif r_exclusion: + elif section_read['exclusion']: ns.cg_itp['exclusion'].append([int(bead_id)-1 for bead_id in sp_itp_line]) # error handling, verify that funct, value and fct are all identical within the group, as they should be, and reduce arrays to single elements # TODO: make these messages more clear and CORRECT for the dihedral function handling -- also explain this is the current Opti.CG implementation, function 9 might come in next version # TODO: check what kind of error or processing is done when a correct line is duplicated within a group ?? probably it goes on in a bad way - for geom in ['constraint']: # constraints only + + def msg(geom, grp_geom): + str_msg = ( + f"In the provided CG ITP file {geom}s have been grouped, but {geom}s group " + f"{str(grp_geom + 1)} holds lines that have different parameters.\nParameters should be " + f"identical within a group, only CG beads IDs should differ.\n" + f"Please correct the CG ITP file and separate groups using a blank or commented line." + ) + return str_msg + + for geom in ['constraint']: # constraints only for grp_geom in range(len(ns.cg_itp[geom])): - for var in ['funct', 'value']: + for var in ['func', 'value']: var_set = set(ns.cg_itp[geom][grp_geom][var]) if len(var_set) == 1: ns.cg_itp[geom][grp_geom][var] = var_set.pop() else: - sys.exit(config.header_error+'In the provided CG ITP file '+geom+' have been grouped, but '+geom+' group '+str(grp_geom+1)+' holds '+geom+' lines that have different parameters\nParameters should be identical within a '+geom+' group, only CG beads IDs should differ\nPlease correct the CG ITP file and separate groups using a blank or commented line') + raise exceptions.MissformattedFile(msg(geom, grp_geom)) - for geom in ['bond', 'angle']: # bonds and angles only + for geom in ['bond', 'angle']: # bonds and angles only for grp_geom in range(len(ns.cg_itp[geom])): - for var in ['funct', 'value', 'fct']: + for var in ['func', 'value', 'fct']: var_set = set(ns.cg_itp[geom][grp_geom][var]) if len(var_set) == 1: ns.cg_itp[geom][grp_geom][var] = var_set.pop() else: - sys.exit(config.header_error+'In the provided CG ITP file '+geom+' have been grouped, but '+geom+' group '+str(grp_geom+1)+' holds '+geom+' lines that have different parameters\nParameters should be identical within groups, only CG beads IDs should differ between lines of a '+geom+' group\nPlease correct the CG ITP file and separate groups using a blank or commented line') + raise exceptions.MissformattedFile(msg(geom, grp_geom)) - for geom in ['dihedral']: # dihedrals only + for geom in ['dihedral']: # dihedrals only for grp_geom in range(len(ns.cg_itp[geom])): - for var in ['funct', 'value', 'fct']: + for var in ['func', 'value', 'fct']: var_set = set(ns.cg_itp[geom][grp_geom][var]) if len(var_set) == 1: ns.cg_itp[geom][grp_geom][var] = var_set.pop() else: - sys.exit(config.header_error+'In the provided CG ITP file '+geom+' have been grouped, but '+geom+' group '+str(grp_geom+1)+' holds '+geom+' lines that have different parameters\nParameters should be identical within groups, only CG beads IDs should differ between lines of a '+geom+' group\nPlease correct the CG ITP file and separate groups using a blank or commented line') + raise exceptions.MissformattedFile(msg(geom, grp_geom)) + for var in ['mult']: var_set = set(ns.cg_itp[geom][grp_geom][var]) if len(var_set) == 1: ns.cg_itp[geom][grp_geom][var] = var_set.pop() else: - sys.exit(config.header_error+'In the provided CG ITP file '+geom+' have been grouped, but '+geom+' group '+str(grp_geom+1)+' holds '+geom+' lines that have different parameters\nParameters should be identical within groups, only CG beads IDs should differ between lines of a '+geom+' group') - + raise exceptions.MissformattedFile(msg(geom, grp_geom)) + + # verify we have as many real CG beads (i.e. NOT virtual sites) in the ITP than in the mapping file + if len(ns.real_beads_ids) != len(ns.all_beads): + msg = ( + "The CG beads mapping (NDX) file does NOT include as many CG beads as the ITP file.\n" + "Please check the NDX and ITP files you provided." + ) + raise exceptions.MissformattedFile(msg) + ns.nb_constraints += 1 ns.nb_bonds += 1 ns.nb_angles += 1 ns.nb_dihedrals += 1 - print(' Found '+str(ns.nb_constraints)+' constraints groups', flush=True) - print(' Found '+str(ns.nb_bonds)+' bonds groups', flush=True) - print(' Found '+str(ns.nb_angles)+' angles groups', flush=True) - print(' Found '+str(ns.nb_dihedrals)+' dihedrals groups', flush=True) - - return + print(f' Found {len(ns.real_beads_ids)} beads') + print(f' Found {len(ns.vs_beads_ids)} virtual sites') + print(f' Found {ns.nb_constraints} constraints groups') + print(f' Found {ns.nb_bonds} bonds groups') + print(f' Found {ns.nb_angles} angles groups') + print(f' Found {ns.nb_dihedrals} dihedrals groups') # load CG beads from NDX-like file @@ -487,10 +525,10 @@ def read_ndx_atoms2beads(ns): with open(ns.cg_map_filename, 'r') as fp: ndx_lines = fp.read().split('\n') - ndx_lines = [ndx_line.strip().split(';')[0] for ndx_line in ndx_lines] # split for comments + ndx_lines = [ndx_line.strip().split(';')[0] for ndx_line in ndx_lines] # split for comments ns.atoms_occ_total = collections.Counter() - ns.all_beads = dict() # atoms id mapped to each bead + ns.all_beads = dict() # atoms id mapped to each bead bead_id = 0 current_section = 'Beginning of file' @@ -499,49 +537,61 @@ def read_ndx_atoms2beads(ns): if ndx_line != '': if bool(re.search('\[.*\]', ndx_line)): - ns.all_beads[bead_id] = {'atoms_id': []} - lines_read = 0 # error handling, ensure only 1 line is read for each NDX file section/bead current_section = ndx_line + ns.all_beads[bead_id] = {'atoms_id': [], 'section': current_section, 'line_nb': i+1} + current_bead_id = bead_id + bead_id += 1 else: try: - lines_read += 1 - if lines_read > 1: - sys.exit(config.header_error+'A section of the CG beads mapping (NDX) file has multiple lines, while Swarm-CG accepts only one line per section\nPlease use a single line for IDs under section '+current_section+' near line '+str(i+1)) - bead_atoms_id = [int(atom_id)-1 for atom_id in ndx_line.split()] # retrieve indexing from 0 for atoms IDs for MDAnalysis - ns.all_beads[bead_id]['atoms_id'].extend(bead_atoms_id) # all atoms included in current bead + bead_atoms_id = [int(atom_id)-1 for atom_id in ndx_line.split()] # retrieve indexing from 0 for atoms IDs for MDAnalysis + ns.all_beads[current_bead_id]['atoms_id'].extend(bead_atoms_id) # all atoms included in current bead - for atom_id in bead_atoms_id: # bead to which each atom belongs (one atom can belong to multiple beads if there is split-mapping) + for atom_id in bead_atoms_id: # bead to which each atom belongs (one atom can belong to multiple beads if there is split-mapping) ns.atoms_occ_total[atom_id] += 1 - bead_id += 1 except NameError: - sys.exit(config.header_error+'The CG beads mapping (NDX) file does NOT seem to contain CG beads sections, please verify the input mapping\nThe expected format is Gromacs NDX') - except ValueError: # non-integer atom ID provided - sys.exit(config.header_error+'Incorrect reading of the sections content in the CG beads mapping (NDX) file\nFound non-integer values for some IDs at line '+str(i+1)+' under section '+current_section) + msg = ( + "The CG beads mapping (NDX) file does NOT seem to contain CG beads " + "sections.\nPlease verify the input mapping. The expected format is " + "Gromacs NDX." + ) + raise exceptions.MissformattedFile(msg) + + except ValueError: # non-integer atom ID provided + msg = ( + f"Incorrect reading of the sections content in the CG beads mapping " + f"(NDX) file.\nFound non-integer values for some IDs at line " + f"{str(i + 1)} under section {current_section}." + ) + raise exceptions.MissformattedFile(msg) - return + for bead_id in ns.all_beads: + if len(ns.all_beads[bead_id]['atoms_id']) == 0: + msg = ( + f"The ITP file contains an empty section named {ns.all_beads[bead_id]['section']} starting at line {ns.all_beads[bead_id]['line_nb']}." + f"Empty sections are NOT allowed, please fill or delete it." + ) + raise exceptions.MissformattedFile(msg) # calculate weight ratio of atom ID in given CG bead +# this is for splitting atom weight in case an atom is mapped to several CG beads def get_atoms_weights_in_beads(ns): - # print('Calculating atoms weights in respect to CG beads mapping') ns.atom_w = dict() - # if ns.verbose: - # print() + if ns.verbose: + print('Calculating atoms weights ratio within mapped CG beads') for bead_id in ns.all_beads: # print('Weighting bead_id', bead_id) ns.atom_w[bead_id] = dict() beads_atoms_counts = collections.Counter(ns.all_beads[bead_id]['atoms_id']) for atom_id in beads_atoms_counts: ns.atom_w[bead_id][atom_id] = round(beads_atoms_counts[atom_id] / ns.atoms_occ_total[atom_id], 3) - # if ns.verbose: - # print(' Weight ratio is', ns.atom_w[bead_id][atom_id], 'for atom ID', atom_id, 'attributed to CG bead ID', bead_id) - # if ns.verbose: - # print() - - return + if ns.verbose and ns.mapping_type == 'COM': + print(' CG bead ID', bead_id+1, '-- Atom ID', atom_id+1, 'has weight ratio =', ns.atom_w[bead_id][atom_id]) + if ns.verbose: + print() # for each CG bead, create atom groups for trajectory geoms calculation using mass and atom weights across beads @@ -551,73 +601,51 @@ def get_beads_MDA_atomgroups(ns): for bead_id in ns.atom_w: try: # print('Created bead_id', bead_id, 'using atoms', [atom_id for atom_id in ns.atom_w[bead_id]]) - ns.mda_beads_atom_grps[bead_id] = mda.AtomGroup([atom_id for atom_id in ns.atom_w[bead_id]], ns.aa_universe) - ns.mda_weights_atom_grps[bead_id] = np.array([ns.atom_w[bead_id][atom_id]*ns.aa_universe.atoms[atom_id].mass for atom_id in ns.atom_w[bead_id]]) - # ns.mda_weights_atom_grps[bead_id] = np.array([ns.atom_w[bead_id][atom_id]*ns.all_atoms[atom_id]['atom_mass'] for atom_id in ns.atom_w[bead_id]]) - except IndexError as e: - sys.exit(config.header_error+'An ID present in your mapping (NDX) file could not be found in the AA trajectory, please check your mapping (NDX) file\nSee the error below to understand which ID (here 0-indexed) could not be found:\n '+str(e)) + if ns.mapping_type == 'COM': + ns.mda_beads_atom_grps[bead_id] = mda.AtomGroup([atom_id for atom_id in ns.atom_w[bead_id]], ns.aa_universe) + ns.mda_weights_atom_grps[bead_id] = np.array([ns.atom_w[bead_id][atom_id]*ns.aa_universe.atoms[atom_id].mass for atom_id in ns.atom_w[bead_id]]) + elif ns.mapping_type == 'COG': + ns.mda_beads_atom_grps[bead_id] = mda.AtomGroup([atom_id for atom_id in ns.atom_w[bead_id]], ns.aa_universe) + ns.mda_weights_atom_grps[bead_id] = np.array([1 for _ in ns.atom_w[bead_id]]) - return + except IndexError as e: + msg = ( + f"An ID present in your mapping (NDX) file could not be found in the AA trajectory. " + f"Please check your mapping (NDX) file.\nSee the error below to understand which " + f"ID (here 0-indexed) could not be found:\n\n{str(e)}" + ) + raise exceptions.MissformattedFile(msg) # compute average radius of gyration def compute_Rg(ns, traj_type): - if traj_type == 'AA': # currently we do not make use of this block in the scripts, but I let this here for later + if traj_type == 'AA': gyr_aa = np.empty(len(ns.aa_universe.trajectory)) - frame_nb = 0 - - for _ in ns.aa_universe.trajectory: - gyr_aa[frame_nb] = ns.aa_universe.atoms[:len(ns.all_atoms)].radius_of_gyration(pbc=None, backend=ns.mda_backend) - frame_nb += 1 - ns.gyr_aa = round(np.average(gyr_aa)/10, 3) # retrieve nm - ns.gyr_aa_std = round(np.std(gyr_aa)/10, 3) # retrieve nm + for ts in ns.aa_universe.trajectory: + gyr_aa[ts.frame] = ns.aa_universe.atoms[:len(ns.all_atoms)].radius_of_gyration(pbc=None, backend=ns.mda_backend) + ns.gyr_aa = round(np.average(gyr_aa) / 10, 3) # retrieve nm + ns.gyr_aa_std = round(np.std(gyr_aa) / 10, 3) # retrieve nm elif traj_type == 'AA_mapped': gyr_aa_mapped = np.empty(len(ns.aa_universe.trajectory)) - frame_nb = 0 - total_mass = sum([ns.cg_universe.atoms[bead_id].mass for bead_id in range(len(ns.cg_itp['atoms']))]) - beads_masses = np.array([np.array([ns.cg_universe.atoms[bead_id].mass]) for bead_id in range(len(ns.cg_itp['atoms']))]) - # print('BEADS MASSES CG for Rg calculation AA-mapped:\n', beads_masses) - - for _ in ns.aa_universe.trajectory: - mapped_pos = np.array([ns.mda_beads_atom_grps[bead_id].center(ns.mda_weights_atom_grps[bead_id], pbc=None, compound='group') for bead_id in range(len(ns.cg_itp['atoms']))]) - com = np.sum(beads_masses * mapped_pos, axis=0) / total_mass - mapped_pos_dist_com = mda.lib.distances.distance_array(com, mapped_pos, backend=ns.mda_backend) - gyr_aa_mapped[frame_nb] = np.sqrt(np.sum(beads_masses.reshape(1,len(beads_masses)) * np.power(mapped_pos_dist_com, 2)) / total_mass) - frame_nb += 1 - ns.gyr_aa_mapped = round(np.average(gyr_aa_mapped)/10 + ns.aa_rg_offset, 3) # retrieve nm - ns.gyr_aa_mapped_std = round(np.std(gyr_aa_mapped)/10, 3) # retrieve nm - - # FOR PAPER - # try: - # np.savetxt(ns.datamol+'_Rg_AA.npy', gyr_aa_mapped/10) - # except AttributeError: - # pass + for ts in ns.aa2cg_universe.trajectory: + gyr_aa_mapped[ts.frame] = ns.aa2cg_universe.atoms[:len(ns.cg_itp['atoms'])].radius_of_gyration(pbc=None, backend=ns.mda_backend) + ns.gyr_aa_mapped = round(np.average(gyr_aa_mapped) / 10 + ns.aa_rg_offset, 3) # retrieve nm + ns.gyr_aa_mapped_std = round(np.std(gyr_aa_mapped) / 10, 3) # retrieve nm elif traj_type == 'CG': gyr_cg = np.empty(len(ns.cg_universe.trajectory)) - frame_nb = 0 - - for _ in ns.cg_universe.trajectory: - gyr_cg[frame_nb] = ns.cg_universe.atoms[:len(ns.cg_itp['atoms'])].radius_of_gyration(pbc=None, backend=ns.mda_backend) - frame_nb += 1 - ns.gyr_cg = round(np.average(gyr_cg)/10, 3) # retrieve nm - ns.gyr_cg_std = round(np.std(gyr_cg)/10, 3) # retrieve nm - - # FOR PAPER - # try: - # np.savetxt(ns.datamol+'_Rg_CG.npy', gyr_cg/10) - # except AttributeError: - # pass + for ts in ns.cg_universe.trajectory: + gyr_cg[ts.frame] = ns.cg_universe.atoms[:len(ns.cg_itp['atoms'])].radius_of_gyration(pbc=None, backend=ns.mda_backend) + ns.gyr_cg = round(np.average(gyr_cg) / 10, 3) # retrieve nm + ns.gyr_cg_std = round(np.std(gyr_cg) / 10, 3) # retrieve nm else: - sys.exit('Code error compute_Rg') - - return + raise RuntimeError('Unexpected error in function: compute_Rg') # read 1 column of xvg file and return as array @@ -638,115 +666,78 @@ def exec_gmx(gmx_cmd): with subprocess.Popen([gmx_cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as gmx_process: gmx_out = gmx_process.communicate()[1].decode() gmx_process.kill() - # print_stdout_forced('Return code:', gmx_process.returncode) if gmx_process.returncode != 0: print_stdout_forced('NON-ZERO EXIT CODE FOR COMMAND:', gmx_cmd, '\n\nCOMMAND OUTPUT:\n\n', gmx_out, '\n\n') return gmx_process.returncode # compute average SASA -# this works with calls to GMX because only MDTraj can compute SASA (not MDAnalysis) but I don't have time to look into using MDTraj +# NOTE: currently this is just COM mappping via GMX to get the SASA, so it's approximative but that's OK +# this works with calls to GMX because only library MDTraj can compute SASA (not MDAnalysis) +# TODO: MDA is working on it, keep an eye on this: https://github.com/MDAnalysis/mdanalysis/issues/2439 def compute_SASA(ns, traj_type): + probe_radius = 0.26 # nm + if traj_type == 'AA': - sys.exit('Compute_SASA not implemented for AA atm') + raise exceptions.InvalidArgument('Compute_SASA not implemented for AA atm') elif traj_type == 'AA_mapped': - nb_beads = len(ns.all_beads) + # NOTE: here we assume the VS all come after the real beads in the ITP [ atoms ] field + # we generate a new truncated TPR so that we can use GMX sasa, this is shit but no choice atm + nb_beads_real = len(ns.real_beads_ids) - # generate an index.ndx file with the number of beads, so we can call SASA on this group even if there are residues in the molecule + # generate an index.ndx file with the number of beads, + # so we can call SASA on this group and we will have exactly the content we want ns.cg_ndx_filename = '../'+config.input_sim_files_dirname+'/cg_index.ndx' with open(ns.cg_ndx_filename, 'w') as fp: - beads_id_str = '' - for i in range(nb_beads): - beads_id_str += str(i+1)+' ' - fp.write('[' +ns.cg_itp['moleculetype']['molname']+' ]\n'+beads_id_str+'\n') + beads_ids_str = ' '.join(map(str, list(range(1, nb_beads_real+1)))) # includes VS if present + fp.write('[' + ns.cg_itp['moleculetype']['molname'] + ' ]\n' + beads_ids_str + '\n') - # TODO. all these paths need to be fixed to allow for SASA calculation within evaluate_model.py -- but ideally we would use a library instead of external calls to gmx sasa ! + # TODO: all these paths need to be fixed to allow for SASA calculation within evaluate_model.py + # that's why it's disabled atm ns.aa_traj_whole_filename = '../'+config.input_sim_files_dirname+'/aa_traj_whole.xtc' - ns.aa_frame_whole_filename = '../'+config.input_sim_files_dirname+'/aa_frame_whole.gro' ns.aa_mapped_traj_whole_filename = '../'+config.input_sim_files_dirname+'/aa_mapped_traj_whole.xtc' - ns.aa_mapped_frame_whole_filename = '../'+config.input_sim_files_dirname+'/aa_mapped_frame_whole.gro' ns.aa_mapped_sasa_filename = '../'+config.input_sim_files_dirname+'/aa_mapped_sasa.xvg' ns.aa_mapped_tpr_sasa_filename = '../'+config.input_sim_files_dirname+'/aa_mapped_tpr_sasa.tpr' non_zero_return_code = False # first make traj whole - gmx_cmd = 'seq 0 1 | '+ns.gmx_path+' trjconv -s ../../'+ns.aa_tpr_filename+' -f ../../'+ns.aa_traj_filename+' -pbc mol -o '+ns.aa_traj_whole_filename + gmx_cmd = f'seq 0 1 | {ns.gmx_path} trjconv -s ../../{ns.aa_tpr_filename} -f ../../{ns.aa_traj_filename} -pbc mol -o {ns.aa_traj_whole_filename}' return_code = exec_gmx(gmx_cmd) if return_code != 0: non_zero_return_code = True - # dump an AA frame, only to generate mapped TPR - if not non_zero_return_code: - gmx_cmd = 'seq 0 1 | '+ns.gmx_path+' trjconv -s ../../'+ns.aa_tpr_filename+' -f '+ns.aa_traj_whole_filename+' -dump 0 -o '+ns.aa_frame_whole_filename - return_code = exec_gmx(gmx_cmd) - if return_code != 0: - non_zero_return_code = True - # then map AA traj if not non_zero_return_code: - gmx_cmd = 'seq 0 '+str(nb_beads-1)+' | '+ns.gmx_path+' traj -f '+ns.aa_traj_whole_filename+' -s ../../'+ns.aa_tpr_filename+' -oxt '+ns.aa_mapped_traj_whole_filename+' -n ../../'+ns.cg_map_filename+' -com -ng '+str(nb_beads) + gmx_cmd = f'seq 0 {nb_beads_real - 1} | {ns.gmx_path} traj -f {ns.aa_traj_whole_filename} -s ../../{ns.aa_tpr_filename} -oxt {ns.aa_mapped_traj_whole_filename} -n ../../{ns.cg_map_filename} -com -ng {nb_beads_real}' return_code = exec_gmx(gmx_cmd) if return_code != 0: non_zero_return_code = True - # map AA frame + # truncate the CG TPR to get only real beads if not non_zero_return_code: - gmx_cmd = 'seq 0 '+str(nb_beads-1)+' | '+ns.gmx_path+' traj -f '+ns.aa_frame_whole_filename+' -s ../../'+ns.aa_tpr_filename+' -oxt '+ns.aa_mapped_frame_whole_filename+' -n ../../'+ns.cg_map_filename+' -com -ng '+str(nb_beads) + gmx_cmd = f'{ns.gmx_path} convert-tpr -s md.tpr -n {ns.cg_ndx_filename} -o {ns.aa_mapped_tpr_sasa_filename}' return_code = exec_gmx(gmx_cmd) if return_code != 0: non_zero_return_code = True - # # NOTE: currently if CG TOP file does NOT end with section [ molecules ] it will most probably crash everything - - # # create new CG TOP file that contains only the molecule of interest - # if not non_zero_return_code: - # ns.modified_top_input_filename = '../'+config.input_sim_files_dirname+'/auto_modified_system_for_sasa.top' - # # we keep only the first non-commented occurence in section [ molecules ] - # with open('../'+config.input_sim_files_dirname+'/'+ns.top_input_basename, 'r') as fp: - # top_lines = fp.read().split('\n') - # with open(ns.modified_top_input_filename, 'w') as fp: - # uncommented_occ = 0 - # readmol = False - # for top_line in top_lines: - # # print_stdout_forced('CURRENT LINE:', top_line) - # if re.match('\[.*molecules.*\]', top_line): - # readmol = True - # # print_stdout_forced('READMOL TRUE') - # if readmol: - # top_line.strip() - # if not top_line.startswith(';'): - # uncommented_occ += 1 - # # print_stdout_forced('occ counter:', uncommented_occ) - # if uncommented_occ <= 2: - # fp.write(top_line+'\n') - # # print_stdout_forced('WRITE NORMAL') - # else: - # fp.write('; '+top_line+'\n') - # # print_stdout_forced('WRITE COMMENTED') - - # # create mapped TPR - # if not non_zero_return_code: - # gmx_cmd = ns.gmx_path+' grompp -c '+ns.aa_mapped_frame_whole_filename+' -p '+ns.modified_top_input_filename+' -f ../../'+ns.mdp_md_filename+' -o '+ns.aa_mapped_tpr_sasa_filename+' -maxwarn 1' - # return_code = exec_gmx(gmx_cmd) - # if return_code != 0: - # non_zero_return_code = True - # finally get sasa if not non_zero_return_code: - # gmx_cmd = ns.gmx_path+' sasa -s '+ns.aa_mapped_tpr_sasa_filename+' -f '+ns.aa_mapped_traj_whole_filename+' -n '+ns.cg_ndx_filename+' -surface 0 -o '+ns.aa_mapped_sasa_filename+' -probe '+str(ns.probe_radius) # surface to choose the index group, 2 is the molecule even when there are ions (0 and 1 are System and Others) - gmx_cmd = ns.gmx_path+' sasa -s md.tpr -f '+ns.aa_mapped_traj_whole_filename+' -n '+ns.cg_ndx_filename+' -surface 0 -o '+ns.aa_mapped_sasa_filename+' -probe '+str(ns.probe_radius) # surface to choose the index group, 2 is the molecule even when there are ions (0 and 1 are System and Others) # SWITCHED TO USING THE MD TPR AND ASSUMING THE MOLECULE IS THE FIRST ONE IN TPR + gmx_cmd = f'{ns.gmx_path} sasa -s {ns.aa_mapped_tpr_sasa_filename} -f {ns.aa_mapped_traj_whole_filename} -n {ns.cg_ndx_filename} -surface 0 -o {ns.aa_mapped_sasa_filename} -probe {probe_radius}' return_code = exec_gmx(gmx_cmd) if return_code != 0: non_zero_return_code = True if non_zero_return_code: - print_stdout_forced('There were some errors while calculating SASA for AA-mapped trajectory, please check the error messages displayed above') - sys.exit() # exit, otherwise it will try to calculate AA-mapped SASA at every iteration + msg = ( + "There were some errors while calculating SASA for AA-mapped trajectory.\n" + "Please check the error messages displayed above." + ) + raise exceptions.ComputationError(msg) else: sasa_aa_mapped_per_frame = read_xvg_col(ns.aa_mapped_sasa_filename, 1) ns.sasa_aa_mapped = round(np.mean(sasa_aa_mapped_per_frame), 2) @@ -759,71 +750,72 @@ def compute_SASA(ns, traj_type): non_zero_return_code = False # first make traj whole - gmx_cmd = 'seq 0 1 | '+ns.gmx_path+' trjconv -s '+ns.cg_tpr_filename+' -f '+ns.cg_traj_filename+' -pbc mol -o '+ns.cg_traj_whole_filename + gmx_cmd = f'seq 0 1 | {ns.gmx_path} trjconv -s {ns.cg_tpr_filename} -f {ns.cg_traj_filename} -pbc mol -o {ns.cg_traj_whole_filename}' return_code = exec_gmx(gmx_cmd) if return_code != 0: non_zero_return_code = True # then compute SASA if not non_zero_return_code: - gmx_cmd = ns.gmx_path+' sasa -s '+ns.cg_tpr_filename+' -f '+ns.cg_traj_whole_filename+' -n '+ns.cg_ndx_filename+' -surface 0 -o '+ns.cg_sasa_filename+' -probe '+str(ns.probe_radius) # surface to choose the index group, 2 is the molecule even when there are ions (0 and 1 are System and Others) + # surface to choose the index group, 2 is the molecule even when there are ions (0 and 1 are System and Others) + gmx_cmd = f'{ns.gmx_path} sasa -s {ns.cg_tpr_filename} -f {ns.cg_traj_whole_filename} -n {ns.cg_ndx_filename} -surface 0 -o {ns.cg_sasa_filename} -probe {probe_radius}' return_code = exec_gmx(gmx_cmd) if return_code != 0: non_zero_return_code = True - if non_zero_return_code or not os.path.isfile(ns.cg_sasa_filename): # extra security - # print_stdout_forced('There were some errors while calculating SASA for AA-mapped trajectory, please check the error messages displayed above') + if non_zero_return_code or not os.path.isfile(ns.cg_sasa_filename): # extra security ns.sasa_cg, ns.sasa_cg_std = None, None else: sasa_cg_per_frame = read_xvg_col(ns.cg_sasa_filename, 1) ns.sasa_cg = round(np.mean(sasa_cg_per_frame), 2) ns.sasa_cg_std = round(np.std(sasa_cg_per_frame), 2) - # print_stdout_forced('COMPUTED CG SASA:', ns.sasa_cg) else: - sys.exit('Code error compute SASA') - - return + raise exceptions.ComputationError('Code error compute SASA') # update coarse-grain ITP def update_cg_itp_obj(ns, parameters_set, update_type): - if update_type == 1: # intermediary + if update_type == 1: # intermediary itp_obj = ns.out_itp - elif update_type == 2: # cycles optimized + elif update_type == 2: # cycles optimized itp_obj = ns.opti_itp else: - sys.exit(config.header_error+'Code error in function update_cg_itp_obj, please consider opening an issue on GitHub at '+config.github_url) + msg = ( + f"Code error in function update_cg_itp_obj.\nPlease consider opening an issue on GitHub " + f"at {config.github_url}." + ) + raise exceptions.InvalidArgument(msg) for i in range(ns.opti_cycle['nb_geoms']['constraint']): - itp_obj['constraint'][i]['value'] = round(parameters_set[i], 3) # constraint - distance + itp_obj['constraint'][i]['value'] = round(parameters_set[i], 3) # constraint - distance for i in range(ns.opti_cycle['nb_geoms']['bond']): - itp_obj['bond'][i]['value'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+i], 3) # bond - distance - itp_obj['bond'][i]['fct'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+ns.opti_cycle['nb_geoms']['bond']+i], 3) # bond - force constant + itp_obj['bond'][i]['value'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+i], 3) # bond - distance + itp_obj['bond'][i]['fct'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+ns.opti_cycle['nb_geoms']['bond']+i], 3) # bond - force constant for i in range(ns.opti_cycle['nb_geoms']['angle']): if ns.exec_mode == 1 or ns.exec_mode == 3: - itp_obj['angle'][i]['value'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+i], 2) # angle - value - itp_obj['angle'][i]['fct'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+ns.opti_cycle['nb_geoms']['angle']+i], 2) # angle - force constant + itp_obj['angle'][i]['value'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+i], 2) # angle - value + itp_obj['angle'][i]['fct'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+ns.opti_cycle['nb_geoms']['angle']+i], 2) # angle - force constant else: - itp_obj['angle'][i]['fct'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+i], 2) # angle - force constant + itp_obj['angle'][i]['fct'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+i], 2) # angle - force constant for i in range(ns.opti_cycle['nb_geoms']['dihedral']): if ns.exec_mode == 1: - itp_obj['dihedral'][i]['value'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+2*ns.opti_cycle['nb_geoms']['angle']+i], 2) # dihedral - value + itp_obj['dihedral'][i]['value'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+2*ns.opti_cycle['nb_geoms']['angle']+i], 2) # dihedral - value itp_obj['dihedral'][i]['fct'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+2*ns.opti_cycle['nb_geoms']['angle']+ns.opti_cycle['nb_geoms']['dihedral']+i], 2) # dihedral - force constant elif ns.exec_mode == 3: - itp_obj['dihedral'][i]['fct'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+2*ns.opti_cycle['nb_geoms']['angle']+i], 2) # dihedral - force constant + itp_obj['dihedral'][i]['fct'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+2*ns.opti_cycle['nb_geoms']['angle']+i], 2) # dihedral - force constant else: - itp_obj['dihedral'][i]['fct'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+ns.opti_cycle['nb_geoms']['angle']+i], 2) # dihedral - force constant - - return + itp_obj['dihedral'][i]['fct'] = round(parameters_set[ns.opti_cycle['nb_geoms']['constraint']+2*ns.opti_cycle['nb_geoms']['bond']+ns.opti_cycle['nb_geoms']['angle']+i], 2) # dihedral - force constant # print coarse-grain ITP -def print_cg_itp_file(itp_obj, out_path_itp, print_sections=['constraint', 'bond', 'angle', 'dihedral', 'exclusion']): +# here we have a switch for print_sections because we might want to optimize constraints/bonds/angles/dihedrals +# separately, so we can leave some out with the switch and they will be optimized later +def write_cg_itp_file(itp_obj, out_path_itp, print_sections=['constraint', 'bond', 'angle', 'dihedral', 'exclusion']): with open(out_path_itp, 'w') as fp: @@ -835,59 +827,71 @@ def print_cg_itp_file(itp_obj, out_path_itp, print_sections=['constraint', 'bond fp.write('; id type resnr residue atom cgnr charge mass\n\n') for i in range(len(itp_obj['atoms'])): - - if 'mass_and_eol' in itp_obj['atoms'][i]: - fp.write('{0:<4} {1:>4} {6:>2} {2:>6} {3:>6} {4:<4} {5:9.5f} {7}\n'.format(itp_obj['atoms'][i]['bead_id']+1, itp_obj['atoms'][i]['bead_type'], itp_obj['atoms'][i]['residue'], itp_obj['atoms'][i]['atom'], i+1, itp_obj['atoms'][i]['charge'], itp_obj['atoms'][i]['resnr'], itp_obj['atoms'][i]['mass_and_eol'])) + # if the ITP did NOT contain masses, they are set at 0 in this field during ITP reading + if itp_obj['atoms'][i]['mass'] is not None: + fp.write('{0:<4} {1:>4} {6:>2} {2:>6} {3:>6} {4:<4} {5:9.5f} {7:<5.2f}\n'.format( + itp_obj['atoms'][i]['bead_id']+1, itp_obj['atoms'][i]['bead_type'], + itp_obj['atoms'][i]['residue'], itp_obj['atoms'][i]['atom'], i+1, itp_obj['atoms'][i]['charge'], + itp_obj['atoms'][i]['resnr'], itp_obj['atoms'][i]['mass'])) else: - fp.write('{0:<4} {1:>4} {6:>2} {2:>6} {3:>6} {4:<4} {5:9.5f}\n'.format(itp_obj['atoms'][i]['bead_id']+1, itp_obj['atoms'][i]['bead_type'], itp_obj['atoms'][i]['residue'], itp_obj['atoms'][i]['atom'], i+1, itp_obj['atoms'][i]['charge'], itp_obj['atoms'][i]['resnr'])) + fp.write('{0:<4} {1:>4} {6:>2} {2:>6} {3:>6} {4:<4} {5:9.5f}\n'.format( + itp_obj['atoms'][i]['bead_id'] + 1, itp_obj['atoms'][i]['bead_type'], + itp_obj['atoms'][i]['residue'], itp_obj['atoms'][i]['atom'], i + 1, itp_obj['atoms'][i]['charge'], + itp_obj['atoms'][i]['resnr'])) if 'constraint' in print_sections and 'constraint' in itp_obj and len(itp_obj['constraint']) > 0: fp.write('\n\n[ constraints ]\n') fp.write('; i j funct length\n') for j in range(len(itp_obj['constraint'])): - + constraint_type = itp_obj['constraint'][j]['geom_type'] - fp.write('\n; constraint type '+constraint_type+'\n') # do NOT change this comment format, functions read_cg_itp depends on it + fp.write('\n; constraint type '+constraint_type+'\n') grp_val = itp_obj['constraint'][j]['value'] for i in range(len(itp_obj['constraint'][j]['beads'])): - fp.write('{beads[0]:>5} {beads[1]:>5} {0:>7} {1:8.3f} ; {2} {3}\n'.format(itp_obj['constraint'][j]['funct'], grp_val, constraint_type, itp_obj['constraint'][j]['plt_id'][i], beads=[bead_id+1 for bead_id in itp_obj['constraint'][j]['beads'][i]])) + fp.write('{beads[0]:>5} {beads[1]:>5} {0:>7} {1:8.3f} ; {2}\n'.format( + itp_obj['constraint'][j]['func'], grp_val, constraint_type, + beads=[bead_id+1 for bead_id in itp_obj['constraint'][j]['beads'][i]])) if 'bond' in print_sections and 'bond' in itp_obj and len(itp_obj['bond']) > 0: fp.write('\n\n[ bonds ]\n') fp.write('; i j funct length force.c.\n') for j in range(len(itp_obj['bond'])): - + bond_type = itp_obj['bond'][j]['geom_type'] - fp.write('\n; bond type '+bond_type+'\n') # do NOT change this comment format, functions read_cg_itp depends on it + fp.write('\n; bond type '+bond_type+'\n') grp_val, grp_fct = itp_obj['bond'][j]['value'], itp_obj['bond'][j]['fct'] for i in range(len(itp_obj['bond'][j]['beads'])): - fp.write('{beads[0]:>5} {beads[1]:>5} {0:>7} {1:8.3f} {2:7.2f} ; {3} {4}\n'.format(itp_obj['bond'][j]['funct'], grp_val, grp_fct, bond_type, itp_obj['bond'][j]['plt_id'][i], beads=[bead_id+1 for bead_id in itp_obj['bond'][j]['beads'][i]])) + fp.write('{beads[0]:>5} {beads[1]:>5} {0:>7} {1:8.3f} {2:7.2f} ; {3}\n'.format( + itp_obj['bond'][j]['func'], grp_val, grp_fct, bond_type, + beads=[bead_id+1 for bead_id in itp_obj['bond'][j]['beads'][i]])) if 'angle' in print_sections and 'angle' in itp_obj and len(itp_obj['angle']) > 0: fp.write('\n\n[ angles ]\n') fp.write('; i j k funct angle force.c.\n') for j in range(len(itp_obj['angle'])): - + angle_type = itp_obj['angle'][j]['geom_type'] - fp.write('\n; angle type '+angle_type+'\n') # do NOT change this comment format, functions read_cg_itp depends on it + fp.write('\n; angle type '+angle_type+'\n') grp_val, grp_fct = itp_obj['angle'][j]['value'], itp_obj['angle'][j]['fct'] for i in range(len(itp_obj['angle'][j]['beads'])): - fp.write('{beads[0]:>5} {beads[1]:>5} {beads[2]:>5} {0:>7} {1:9.2f} {2:7.2f} ; {3} {4}\n'.format(itp_obj['angle'][j]['funct'], grp_val, grp_fct, angle_type, itp_obj['angle'][j]['plt_id'][i], beads=[bead_id+1 for bead_id in itp_obj['angle'][j]['beads'][i]])) + fp.write('{beads[0]:>5} {beads[1]:>5} {beads[2]:>5} {0:>7} {1:9.2f} {2:7.2f} ; {3}\n'.format( + itp_obj['angle'][j]['func'], grp_val, grp_fct, angle_type, + beads=[bead_id+1 for bead_id in itp_obj['angle'][j]['beads'][i]])) if 'dihedral' in print_sections and 'dihedral' in itp_obj and len(itp_obj['dihedral']) > 0: fp.write('\n\n[ dihedrals ]\n') fp.write('; i j k l funct dihedral force.c. mult.\n') for j in range(len(itp_obj['dihedral'])): - + dihedral_type = itp_obj['dihedral'][j]['geom_type'] - fp.write('\n; dihedral type '+dihedral_type+'\n') # do NOT change this comment format, functions read_cg_itp depends on it + fp.write('\n; dihedral type '+dihedral_type+'\n') grp_val, grp_fct = itp_obj['dihedral'][j]['value'], itp_obj['dihedral'][j]['fct'] for i in range(len(itp_obj['dihedral'][j]['beads'])): @@ -897,8 +901,61 @@ def print_cg_itp_file(itp_obj, out_path_itp, print_sections=['constraint', 'bond if multiplicity == None: multiplicity = '' - # print(itp_obj['dihedral'][j]['funct'], grp_val, grp_fct, dihedral_type, itp_obj['dihedral'][j]['plt_id'][i], 'beads', itp_obj['dihedral'][j]['beads'][i]) - fp.write('{beads[0]:>5} {beads[1]:>5} {beads[2]:>5} {beads[3]:>5} {0:>7} {1:9.2f} {2:7.2f} {5} ; {3} {4}\n'.format(itp_obj['dihedral'][j]['funct'], grp_val, grp_fct, dihedral_type, itp_obj['dihedral'][j]['plt_id'][i], multiplicity, beads=[bead_id+1 for bead_id in itp_obj['dihedral'][j]['beads'][i]])) + fp.write('{beads[0]:>5} {beads[1]:>5} {beads[2]:>5} {beads[3]:>5} {0:>7} {1:9.2f} {2:7.2f} {4} ; {3}\n'.format( + itp_obj['dihedral'][j]['func'], grp_val, grp_fct, dihedral_type, multiplicity, + beads=[bead_id+1 for bead_id in itp_obj['dihedral'][j]['beads'][i]])) + + # here starts 4 almost identical blocks, that differ only by vs_2, vs_3, vs_4, vs_n + # but we could still need to write several of these sections (careful if factorizing this) + if len(itp_obj['virtual_sites2']) > 0: + fp.write('\n\n[ virtual_sites2 ]\n') + fp.write('; vs i j func param\n') + for bead_id in itp_obj['virtual_sites2']: + fp.write('{0:>5} {beads[0]:>5} {beads[1]:>5} {1:>5} {2}\n'.format( + str(itp_obj['virtual_sites2'][bead_id]['bead_id'] + 1), + str(itp_obj['virtual_sites2'][bead_id]['func']), + itp_obj['virtual_sites2'][bead_id]['vs_params'], + beads=[bid+1 for bid in itp_obj['virtual_sites2'][bead_id]['vs_def_beads_ids']]) + ) + + if len(itp_obj['virtual_sites3']) > 0: + fp.write('\n\n[ virtual_sites3 ]\n') + fp.write('; vs i j k func params\n') + for bead_id in itp_obj['virtual_sites3']: + fp.write('{0:>5} {beads[0]:>5} {beads[1]:>5} {beads[2]:>5} {1:>5} {2}\n'.format( + str(itp_obj['virtual_sites3'][bead_id]['bead_id'] + 1), + str(itp_obj['virtual_sites3'][bead_id]['func']), + ' '.join(map(str, itp_obj['virtual_sites3'][bead_id]['vs_params'])), + beads=[bid+1 for bid in itp_obj['virtual_sites3'][bead_id]['vs_def_beads_ids']]) + ) + + if len(itp_obj['virtual_sites4']) > 0: + fp.write('\n\n[ virtual_sites4 ]\n') + fp.write('; vs i j k l func params\n') + for bead_id in itp_obj['virtual_sites4']: + fp.write('{0:>5} {beads[0]:>5} {beads[1]:>5} {beads[2]:>5} {beads[3]:>5} {1:>5} {2}\n'.format( + str(itp_obj['virtual_sites4'][bead_id]['bead_id'] + 1), + str(itp_obj['virtual_sites4'][bead_id]['func']), + ' '.join(map(str, itp_obj['virtual_sites4'][bead_id]['vs_params'])), + beads=[bid+1 for bid in itp_obj['virtual_sites4'][bead_id]['vs_def_beads_ids']]) + ) + + if len(itp_obj['virtual_sitesn']) > 0: + fp.write('\n\n[ virtual_sitesn ]\n') + fp.write('; vs func def\n') + for bead_id in itp_obj['virtual_sitesn']: + params = [] + if itp_obj['virtual_sitesn'][bead_id]['func'] == 3: + for i in range(len(itp_obj['virtual_sitesn'][bead_id]['vs_def_beads_ids'])): + params.append('{} {}'.format(itp_obj['virtual_sitesn'][bead_id]['vs_def_beads_ids'][i]+1, itp_obj['virtual_sitesn'][bead_id]['vs_params'][i])) + params = ' '.join(params) + else: + params = ' '.join(['{:>4}'.format(bid + 1) for bid in itp_obj['virtual_sitesn'][bead_id]['vs_def_beads_ids']]) + fp.write('{:>5} {:>5} {}\n'.format( + itp_obj['virtual_sitesn'][bead_id]['bead_id'] + 1, + itp_obj['virtual_sitesn'][bead_id]['func'], + params) + ) if 'exclusion' in print_sections and 'exclusion' in itp_obj and len(itp_obj['exclusion']) > 0: fp.write('\n\n[ exclusions ]\n') @@ -909,106 +966,36 @@ def print_cg_itp_file(itp_obj, out_path_itp, print_sections=['constraint', 'bond fp.write('\n\n') - return - - -# build atomistic graph of the molecule + find if atoms are heavy or not -# def build_aa_graph_and_find_heavy_atoms(ns): - -# aa_graph = nx.Graph() -# all_hydrogen_atoms_id, all_heavy_atoms_id = set(), set() - -# for atom_id_1 in ns.all_atoms: -# if ns.all_atoms[atom_id_1]['heavy']: -# all_heavy_atoms_id.add(atom_id_1) -# aa_graph.add_node(atom_id_1, type=ns.all_atoms[atom_id_1]['atom_type']) - -# for atom_id_2 in ns.all_atoms[atom_id_1]['conn']: -# if ns.all_atoms[atom_id_2]['heavy']: -# aa_graph.add_edge(atom_id_1, atom_id_2) -# else: -# all_hydrogen_atoms_id.add(atom_id_1) - -# return aa_graph, all_heavy_atoms_id, all_hydrogen_atoms_id - - -# comparing geoms for identical atomistic content of beads, split into separate groups if atom content is different (atom types and connectivity via graph isomorphism) -# def compare_atom_content(ns, all_cg_geoms, cg_graph, aa_graph, cg_node_matcher_2, cg_edge_matcher, aa_node_matcher): - -# same_types_conn_filtered_cg_geoms = dict() -# geoms_types = {'cg_lvl': {}, 'aa_lvl': {}} -# nb_geoms_types = 0 - -# for geom_id in range(len(all_cg_geoms)): - -# geom = all_cg_geoms[geom_id] -# new_geom_type = cg_graph.subgraph(geom).copy() # CG subgraph of the geom beads as a first filter -- copy since we might remove edges for dihedrals -# if len(geom) == 4: # for dihedrals remove the additional edge within cycles of 4 beads -- dihedrals with rotatable within cycles of 3 beads shall already be discarded from previous steps -# try: -# new_geom_type.remove_edge(geom[0], geom[3]) -# except nx.NetworkXError: -# pass # if there was no edge between opposite beads of the dihedral -# cg_geom_with_neighbors = set([conn_bead_id for bead_id in geom for conn_bead_id in ns.all_beads[bead_id]['conn']]) # extend cg graph to n+1 neighbors so atomistic branching will be taken into account + handle case that include the very central bead of a graph/molecule -- this is necessary to handle cyclic cores correctly, especially -# aa_sg_view_new_geom = aa_graph.subgraph([atom_id for bead_id in cg_geom_with_neighbors for atom_id in ns.all_beads[bead_id]['atoms_id']]) # atomistic subgraph of the beads content, specifically for handling/splitting geoms with edges inside atomistic cycles (benzenes, etc.) or case that include the very central bead of a graph/molecule -# # aa_sg_view_new_geom = aa_graph.subgraph([atom_id for bead_id in geom for atom_id in ns.all_beads[bead_id]['atoms_id']]) # atomistic subgraph of the beads content, specifically for handling/splitting geoms with edges inside atomistic cycles (benzenes, etc.) or case that include the very central bead of a graph/molecule -# found_geom_type = False - -# for known_geom_type in geoms_types['cg_lvl']: - -# GM = nx.algorithms.isomorphism.GraphMatcher(known_geom_type, new_geom_type, node_match=cg_node_matcher_2, edge_match=cg_edge_matcher) -# if GM.is_isomorphic(): - -# aa_sg_view_known_geom = geoms_types['aa_lvl'][geoms_types['cg_lvl'][known_geom_type]] -# if nx.algorithms.isomorphism.is_isomorphic(aa_sg_view_new_geom, aa_sg_view_known_geom, node_match=aa_node_matcher): - -# found_geom_type = True -# ref_geom_ids_order = all_cg_geoms[same_types_conn_filtered_cg_geoms[geoms_types['cg_lvl'][known_geom_type]][0]] -# all_cg_geoms[geom_id] = tuple([GM.mapping[bead_id] for bead_id in ref_geom_ids_order]) # get ordering right for identical elements within the reference list of objects -# same_types_conn_filtered_cg_geoms[geoms_types['cg_lvl'][known_geom_type]].append(geom_id) -# break - -# if not found_geom_type: -# nb_geoms_types += 1 -# geoms_types['cg_lvl'][new_geom_type] = nb_geoms_types -# geoms_types['aa_lvl'][nb_geoms_types] = aa_sg_view_new_geom -# same_types_conn_filtered_cg_geoms[nb_geoms_types] = [geom_id] - -# return same_types_conn_filtered_cg_geoms - # set dimensions of the search space according to the type of optimization (= geom type(s) to optimize) def get_search_space_boundaries(ns): - + search_space_boundaries = [] if ns.opti_cycle['nb_geoms']['constraint'] > 0: - search_space_boundaries.extend(ns.domains_val['constraint']) # constraints distances + search_space_boundaries.extend(ns.domains_val['constraint']) # constraints distances if ns.opti_cycle['nb_geoms']['bond'] > 0: - search_space_boundaries.extend(ns.domains_val['bond']) # bonds distances and force constants + search_space_boundaries.extend(ns.domains_val['bond']) # bonds distances and force constants search_space_boundaries.extend([[config.default_min_fct_bonds, ns.default_max_fct_bonds_opti]]*ns.opti_cycle['nb_geoms']['bond']) if ns.opti_cycle['nb_geoms']['angle'] > 0: if ns.exec_mode == 1 or ns.exec_mode == 3: - search_space_boundaries.extend(ns.domains_val['angle']) # angles values + search_space_boundaries.extend(ns.domains_val['angle']) # angles values - for grp_angle in range(ns.opti_cycle['nb_geoms']['angle']): # angles force constants - if ns.cg_itp['angle'][grp_angle]['funct'] == 1: + for grp_angle in range(ns.opti_cycle['nb_geoms']['angle']): # angles force constants + if ns.cg_itp['angle'][grp_angle]['func'] == 1: search_space_boundaries.extend([[config.default_min_fct_angles, ns.default_max_fct_angles_opti_f1]]) - elif ns.cg_itp['angle'][grp_angle]['funct'] == 2: + elif ns.cg_itp['angle'][grp_angle]['func'] == 2: search_space_boundaries.extend([[config.default_min_fct_angles, ns.default_max_fct_angles_opti_f2]]) - else: - sys.exit('Code error in force constants calculations, in the angles block') if ns.opti_cycle['nb_geoms']['dihedral'] > 0: if ns.exec_mode == 1: - search_space_boundaries.extend(ns.domains_val['dihedral']) # dihedrals values + search_space_boundaries.extend(ns.domains_val['dihedral']) # dihedrals values - for grp_dihedral in range(ns.opti_cycle['nb_geoms']['dihedral']): # dihedrals force constants - if ns.cg_itp['dihedral'][grp_dihedral]['funct'] == 2: + for grp_dihedral in range(ns.opti_cycle['nb_geoms']['dihedral']): # dihedrals force constants + if ns.cg_itp['dihedral'][grp_dihedral]['func'] == 2: search_space_boundaries.extend([[config.default_min_fct_dihedrals_func_without_mult, ns.default_max_fct_dihedrals_opti_func_without_mult]]) - elif ns.cg_itp['dihedral'][grp_dihedral]['funct'] in [1, 4, 9]: + elif ns.cg_itp['dihedral'][grp_dihedral]['func'] in [1, 4, 9]: search_space_boundaries.extend([[-ns.default_abs_range_fct_dihedrals_opti_func_with_mult, ns.default_abs_range_fct_dihedrals_opti_func_with_mult]]) - else: - sys.exit('Code error in force constants calculations, in the dihedrals block') return search_space_boundaries @@ -1037,23 +1024,22 @@ def get_initial_guess_list(ns, nb_particles): input_guess.extend([ns.out_itp['angle'][i]['value'] for i in range(ns.opti_cycle['nb_geoms']['angle'])]) # angles values fct_angles = [] for i in range(ns.opti_cycle['nb_geoms']['angle']): - if ns.cg_itp['angle'][i]['funct'] == 1: + if ns.cg_itp['angle'][i]['func'] == 1: fct_angles.append(min(max(ns.out_itp['angle'][i]['fct'], config.default_min_fct_angles), ns.default_max_fct_angles_opti_f1)) # angles force constants - elif ns.cg_itp['angle'][i]['funct'] == 2: + elif ns.cg_itp['angle'][i]['func'] == 2: fct_angles.append(min(max(ns.out_itp['angle'][i]['fct'], config.default_min_fct_angles), ns.default_max_fct_angles_opti_f2)) # angles force constants - else: - sys.exit('Code error during force constants range definition while getting the initial guesses from BI') input_guess.extend(fct_angles) if ns.exec_mode == 1: input_guess.extend([ns.out_itp['dihedral'][i]['value'] for i in range(ns.opti_cycle['nb_geoms']['dihedral'])]) # dihedrals values fct_dihedrals = [] for i in range(ns.opti_cycle['nb_geoms']['dihedral']): - if ns.cg_itp['dihedral'][i]['funct'] == 2: + if ns.cg_itp['dihedral'][i]['func'] == 2: fct_dihedrals.append(min(max(ns.out_itp['dihedral'][i]['fct'], config.default_min_fct_dihedrals_func_without_mult), ns.default_max_fct_dihedrals_opti_func_without_mult)) # dihedrals force constants else: fct_dihedrals.append(min(max(ns.out_itp['dihedral'][i]['fct'], -ns.default_abs_range_fct_dihedrals_opti_func_with_mult), ns.default_abs_range_fct_dihedrals_opti_func_with_mult)) # dihedrals force constants input_guess.extend(fct_dihedrals) + initial_guess_list.append(input_guess) # the second particle is initialized using best EMD score for each geom, and the parameters that yielded these EMD scores @@ -1117,8 +1103,8 @@ def get_initial_guess_list(ns, nb_particles): # for the other particles we generate variations of the input CG ITP, still within defined boundaries for optimization # boundaries are defined: - # for constraints/bonds length and angles/dihedrals values, according to atomistic mapped trajectory and maximum searchable - # for force constants, according to default or user provided maximal ranges (see config file for defaults) + # - for constraints/bonds length and angles/dihedrals values, according to atomistic mapped trajectory and maximum searchable + # - for force constants, according to default or user provided maximal ranges (see config file for defaults) for i in range(num_particle_random_start, nb_particles): init_guess = [] @@ -1170,12 +1156,10 @@ def get_initial_guess_list(ns, nb_particles): except: emd_err_fact = 1 draw_low = max(min(ns.out_itp['angle'][j]['fct']*(1-ns.fct_guess_fact*emd_err_fact), ns.out_itp['angle'][j]['fct']-config.fct_guess_min_flat_diff_angles), config.default_min_fct_angles) - if ns.cg_itp['angle'][j]['funct'] == 1: + if ns.cg_itp['angle'][j]['func'] == 1: draw_high = min(max(ns.out_itp['angle'][j]['fct']*(1+ns.fct_guess_fact*emd_err_fact), ns.out_itp['angle'][j]['fct']+config.fct_guess_min_flat_diff_angles), ns.default_max_fct_angles_opti_f1) - elif ns.cg_itp['angle'][j]['funct'] == 2: + elif ns.cg_itp['angle'][j]['func'] == 2: draw_high = min(max(ns.out_itp['angle'][j]['fct']*(1+ns.fct_guess_fact*emd_err_fact), ns.out_itp['angle'][j]['fct']+config.fct_guess_min_flat_diff_angles), ns.default_max_fct_angles_opti_f2) - else: - sys.exit('Code error during force constants range definition for angles during particles initialization') init_guess.append(draw_float(draw_low, draw_high, 3)) # dihedrals values @@ -1208,7 +1192,7 @@ def get_initial_guess_list(ns, nb_particles): draw_high = ns.out_itp['dihedral'][j]['fct']*(1-ns.fct_guess_fact*emd_err_fact) # make sure the minimal variation range is enforced + stay within defined boundaries - if ns.cg_itp['dihedral'][j]['funct'] == 2: + if ns.cg_itp['dihedral'][j]['func'] == 2: draw_low = max(min(draw_low, ns.out_itp['dihedral'][j]['fct']-config.fct_guess_min_flat_diff_dihedrals_without_mult), config.default_min_fct_dihedrals_func_without_mult) draw_high = min(max(draw_high, ns.out_itp['dihedral'][j]['fct']+config.fct_guess_min_flat_diff_dihedrals_without_mult), ns.default_max_fct_dihedrals_opti_func_without_mult) else: @@ -1223,42 +1207,129 @@ def get_initial_guess_list(ns, nb_particles): # read atomistic trajectory def read_aa_traj(ns): - - print('Reading All Atom (AA) trajectory', flush=True) + + print('Reading All Atom (AA) trajectory') with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=ImportWarning) # ignore warning: "bootstrap.py:219: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__" - ns.aa_universe = mda.Universe(ns.aa_tpr_filename, ns.aa_traj_filename, in_memory=True, refresh_offsets=True, guess_bonds=False) # setting guess_bonds=False disables angles, dihedrals and improper_dihedrals guessing, which is activated by default - print(' Found', len(ns.aa_universe.trajectory), 'frames in AA trajectory file', flush=True) - # if len(ns.aa_universe.trajectory) > 20000: - # print(config.header_warning+'Your atomistic trajectory contains many frames, which increases computation time\nReasonably reducing the number of frames of your input AA trajectory won\'t affect results quality\n2k to 10k frames is usually enough, as long as behaviour and flexibility of your molecule are correctly described by your atomistic trajectory') + ns.aa_universe = mda.Universe(ns.aa_tpr_filename, ns.aa_traj_filename, in_memory=True, refresh_offsets=True, guess_bonds=False) # setting guess_bonds=False disables angles, dihedrals and improper_dihedrals guessing, which is activated by default in some MDA versions + print(' Found', len(ns.aa_universe.trajectory), 'frames') + + +def initialize_cg_traj(ns): + + masses = np.array([val['mass'] for val in ns.cg_itp['atoms']]) + names = np.array([val['atom'] for val in ns.cg_itp['atoms']]) + resnames = np.array([val['residue'] for val in ns.cg_itp['atoms']]) + resid = np.array([val['resnr'] for val in ns.cg_itp['atoms']]) + nr = len(set([val['resnr'] for val in ns.cg_itp['atoms']])) + + ns.aa2cg_universe = mda.Universe.empty(len(ns.cg_itp['atoms']), n_residues=nr, atom_resindex=resid, n_segments=1, residue_segindex=np.ones(nr), trajectory=True) + ns.aa2cg_universe.add_TopologyAttr('masses') + ns.aa2cg_universe._topology.masses.values = np.array(masses) + ns.aa2cg_universe.add_TopologyAttr('names') + ns.aa2cg_universe._topology.names.values = names + ns.aa2cg_universe.add_TopologyAttr('resnames') + ns.aa2cg_universe._topology.resnames.values = resnames + + +def map_aa2cg_traj(ns): + + if ns.mapping_type == 'COM': + print(' Interpretation: Center of Mass (COM)') + elif ns.mapping_type == 'COG': + print(' Interpretation: Center of Geometry (COG)') + + # regular beads are mapped using center of mass of groups of atoms + coord = np.empty((len(ns.aa_universe.trajectory), len(ns.cg_itp['atoms']), 3)) + for bead_id in range(len(ns.cg_itp['atoms'])): + if not ns.cg_itp['atoms'][bead_id]['bead_type'].startswith('v'): # bead is NOT a virtual site + traj = np.empty((len(ns.aa_universe.trajectory), 3)) + for ts in ns.aa_universe.trajectory: + traj[ts.frame] = ns.mda_beads_atom_grps[bead_id].center( + ns.mda_weights_atom_grps[bead_id], pbc=None, compound='group' + ) # no need for PBC handling, trajectories were made wholes for the molecule + coord[:, bead_id, :] = traj + + ns.aa2cg_universe.load_new(coord, format=mda.coordinates.memory.MemoryReader) + + # virtual sites are mapped using previously defined regular beads positions and appropriate virtual sites functions + # it is also possible to use a VS for defining another VS position, if the VS used for definition is defined before + # no need to check if the functions used for VS definition are correct here, this has been done already + for bead_id in range(len(ns.cg_itp['atoms'])): + if ns.cg_itp['atoms'][bead_id]['bead_type'].startswith('v'): + + traj = np.empty((len(ns.aa2cg_universe.trajectory), 3)) + + if ns.cg_itp['atoms'][bead_id]['vs_type'] == 2: + vs_def_beads_ids = ns.cg_itp['virtual_sites2'][bead_id]['vs_def_beads_ids'] + vs_params = ns.cg_itp['virtual_sites2'][bead_id]['vs_params'] + + if ns.cg_itp['virtual_sites2'][bead_id]['func'] == 1: + vsf.vs2_func_1(ns, traj, vs_def_beads_ids, vs_params) + elif ns.cg_itp['virtual_sites2'][bead_id]['func'] == 2: + vsf.vs2_func_2(ns, traj, vs_def_beads_ids, vs_params) + + if ns.cg_itp['atoms'][bead_id]['vs_type'] == 3: + vs_def_beads_ids = ns.cg_itp['virtual_sites3'][bead_id]['vs_def_beads_ids'] + vs_params = ns.cg_itp['virtual_sites3'][bead_id]['vs_params'] + + if ns.cg_itp['virtual_sites3'][bead_id]['func'] == 1: + vsf.vs3_func_1(ns, traj, vs_def_beads_ids, vs_params) + elif ns.cg_itp['virtual_sites3'][bead_id]['func'] == 2: + vsf.vs3_func_2(ns, traj, vs_def_beads_ids, vs_params) + elif ns.cg_itp['virtual_sites3'][bead_id]['func'] == 3: + vsf.vs3_func_3(ns, traj, vs_def_beads_ids, vs_params) + elif ns.cg_itp['virtual_sites3'][bead_id]['func'] == 4: + vsf.vs3_func_4(ns, traj, vs_def_beads_ids, vs_params) + + # here it's normal there is only function 2, that's the only one that exists in gromacs for some reason + if ns.cg_itp['atoms'][bead_id]['vs_type'] == 4: + vs_def_beads_ids = ns.cg_itp['virtual_sites4'][bead_id]['vs_def_beads_ids'] + vs_params = ns.cg_itp['virtual_sites4'][bead_id]['vs_params'] + + if ns.cg_itp['virtual_sites4'][bead_id]['func'] == 2: + vsf.vs4_func_2(ns, traj, vs_def_beads_ids, vs_params) + + if ns.cg_itp['atoms'][bead_id]['vs_type'] == 'n': + vs_def_beads_ids = ns.cg_itp['virtual_sitesn'][bead_id]['vs_def_beads_ids'] + vs_params = ns.cg_itp['virtual_sitesn'][bead_id]['vs_params'] + + if ns.cg_itp['virtual_sitesn'][bead_id]['func'] == 1: + vsf.vsn_func_1(ns, traj, vs_def_beads_ids) + elif ns.cg_itp['virtual_sitesn'][bead_id]['func'] == 2: + vsf.vsn_func_2(ns, traj, vs_def_beads_ids, bead_id) + elif ns.cg_itp['virtual_sitesn'][bead_id]['func'] == 3: + vsf.vsn_func_3(ns, traj, vs_def_beads_ids, vs_params) - return + coord[:, bead_id, :] = traj + + ns.aa2cg_universe.load_new(coord, format=mda.coordinates.memory.MemoryReader) # use selected whole molecules as MDA atomgroups and make their coordinates whole, inplace, across the complete tAA rajectory def make_aa_traj_whole_for_selected_mols(ns): - # TODO: add an option to NOT read the PBC in case user would feed a trajectory that is already OK and their trajectory does NOT contain PBC/BOX size info across trajectory (this was an issue I encountered with Davide B3T traj GRO) - # try: + # TODO: add an option to NOT read the PBC in case user would feed a trajectory that is already unwrapped for + # molecule and their trajectory does NOT contain box dimensions (universe.dimensions) + # (this was an issue I encountered with Davide B3T traj GRO) for _ in ns.aa_universe.trajectory: for aa_mol in ns.all_aa_mols: mda.lib.mdamath.make_whole(aa_mol, inplace=True) - # except ValueError as e: - # print(e) - - return # build gromacs command with arguments -def gmx_args(gmx_cmd, nb_threads, gpu_id, gmx_args_str): +def gmx_args(ns, gmx_cmd, mpi=True): - if gmx_args_str != '': - gmx_cmd += ' '+gmx_args_str + gmx_cmd = f"{ns.gmx_path} {gmx_cmd}" + if ns.gmx_args_str != '': + gmx_cmd = f"{gmx_cmd} {ns.gmx_args_str}" else: - if nb_threads > 0: - gmx_cmd += ' -nt '+str(nb_threads) - if len(gpu_id) > 0: - gmx_cmd += ' -gpu_id '+str(gpu_id) + if ns.nb_threads > 0: + gmx_cmd = f"{gmx_cmd} -nt {ns.nb_threads}" + if len(ns.gpu_id) > 0: + gmx_cmd = f"{gmx_cmd} -gpu_id {ns.gpu_id}" + if mpi and ns.mpi_tasks > 1: + gmx_cmd = f"mpirun -np {ns.mpi_tasks} {ns.gmx_cmd}" return gmx_cmd @@ -1285,24 +1356,18 @@ def create_bins_and_dist_matrices(ns, constraints=True): bins_dihedrals_dist_matrix = cdist(bins_dihedrals_reshape, bins_dihedrals_reshape) # 'classical' distance matrix ns.bins_dihedrals_dist_matrix = np.where(bins_dihedrals_dist_matrix > max(bins_dihedrals_dist_matrix[0])/2, max(bins_dihedrals_dist_matrix[0])-bins_dihedrals_dist_matrix, bins_dihedrals_dist_matrix) # periodic distance matrix - return - # calculate bonds distribution from AA trajectory def get_AA_bonds_distrib(ns, beads_ids, grp_type, grp_nb): - bond_values = np.empty(len(ns.aa_universe.trajectory) * len(beads_ids)) + bond_values = np.empty(len(ns.aa2cg_universe.trajectory) * len(beads_ids)) for i in range(len(beads_ids)): bead_id_1, bead_id_2 = beads_ids[i] - # print('bead_id_1:', bead_id_1, 'using atoms:', ns.mda_beads_atom_grps[bead_id_1].atoms, 'with weights:', ns.mda_weights_atom_grps[bead_id_1]) - # print('bead_id_2:', bead_id_2, 'using atoms:', ns.mda_beads_atom_grps[bead_id_2].atoms, 'with weights:', ns.mda_weights_atom_grps[bead_id_2]) - # print() - frame_nb = 0 - for _ in ns.aa_universe.trajectory: - pos_1 = ns.mda_beads_atom_grps[bead_id_1].center(ns.mda_weights_atom_grps[bead_id_1], pbc=None, compound='group') # no need for PBC handling, trajectories were made wholes for the molecule - pos_2 = ns.mda_beads_atom_grps[bead_id_2].center(ns.mda_weights_atom_grps[bead_id_2], pbc=None, compound='group') - bond_values[len(ns.aa_universe.trajectory)*i+frame_nb] = mda.lib.distances.calc_bonds(pos_1, pos_2, backend=ns.mda_backend, box=None) / 10 # retrieve nm - frame_nb += 1 + for ts in ns.aa2cg_universe.trajectory: + bond_values[len(ns.aa2cg_universe.trajectory)*i+ts.frame] = mda.lib.distances.calc_bonds( + ns.aa2cg_universe.atoms[bead_id_1].position, + ns.aa2cg_universe.atoms[bead_id_2].position, + backend=ns.mda_backend, box=None) / 10 # retrieved nm bond_avg_init = round(np.average(bond_values), 3) @@ -1325,14 +1390,14 @@ def get_AA_bonds_distrib(ns, beads_ids, grp_type, grp_nb): print(' Ref. AA-mapped distrib. rescaled to avg', bond_avg_final, 'nm for', grp_type, grp_nb+1, '(initially', bond_avg_init, 'nm)') # or if specific lengths were provided for constraints and/or bonds - elif ns.bonds_scaling_specific != None: + elif ns.bonds_scaling_specific is not None: if grp_type.startswith('constraint'): - geom_id_full = 'C'+str(grp_nb+1) + geom_id_full = f'C{grp_nb+1}' if grp_type.startswith('bond'): - geom_id_full = 'B'+str(grp_nb+1) + geom_id_full = f'B{grp_nb+1}' - if (geom_id_full[0] == 'C' and geom_id_full in ns.bonds_scaling_specific) or (geom_id_full[0] == 'B' and geom_id_full in ns.bonds_scaling_specific): + if (geom_id_full.startswith('C') and geom_id_full in ns.bonds_scaling_specific) or (geom_id_full.startswith('B') and geom_id_full in ns.bonds_scaling_specific): bond_rescale_factor = ns.bonds_scaling_specific[geom_id_full] / bond_avg_init bond_values = [bond_length * bond_rescale_factor for bond_length in bond_values] bond_avg_final = round(np.average(bond_values), 3) @@ -1349,9 +1414,9 @@ def get_AA_bonds_distrib(ns, beads_ids, grp_type, grp_nb): # exclusions storage format: ns.cg_itp['exclusion'].append([int(bead_id)-1 for bead_id in sp_itp_line[0:2]]) if grp_type.startswith('constraint'): - bond_hist = np.histogram(bond_values, ns.bins_constraints, density=True)[0]*ns.bw_constraints # retrieve 1-sum densities + bond_hist = np.histogram(bond_values, ns.bins_constraints, density=True)[0]*ns.bw_constraints # retrieve 1-sum densities if grp_type.startswith('bond'): - bond_hist = np.histogram(bond_values, ns.bins_bonds, density=True)[0]*ns.bw_bonds # retrieve 1-sum densities + bond_hist = np.histogram(bond_values, ns.bins_bonds, density=True)[0]*ns.bw_bonds # retrieve 1-sum densities return bond_avg_final, bond_hist, bond_values @@ -1359,20 +1424,19 @@ def get_AA_bonds_distrib(ns, beads_ids, grp_type, grp_nb): # calculate angles distribution from AA trajectory def get_AA_angles_distrib(ns, beads_ids): - angle_values_rad = np.empty(len(ns.aa_universe.trajectory) * len(beads_ids)) + angle_values_rad = np.empty(len(ns.aa2cg_universe.trajectory) * len(beads_ids)) for i in range(len(beads_ids)): bead_id_1, bead_id_2, bead_id_3 = beads_ids[i] - frame_nb = 0 - for _ in ns.aa_universe.trajectory: - pos_1 = ns.mda_beads_atom_grps[bead_id_1].center(ns.mda_weights_atom_grps[bead_id_1], pbc=None, compound='group') # no need for PBC handling, trajectories were made wholes for the molecule - pos_2 = ns.mda_beads_atom_grps[bead_id_2].center(ns.mda_weights_atom_grps[bead_id_2], pbc=None, compound='group') - pos_3 = ns.mda_beads_atom_grps[bead_id_3].center(ns.mda_weights_atom_grps[bead_id_3], pbc=None, compound='group') - angle_values_rad[len(ns.aa_universe.trajectory)*i+frame_nb] = mda.lib.distances.calc_angles(pos_1, pos_2, pos_3, backend=ns.mda_backend, box=None) - frame_nb += 1 + for ts in ns.aa2cg_universe.trajectory: + angle_values_rad[len(ns.aa2cg_universe.trajectory)*i+ts.frame] = mda.lib.distances.calc_angles( + ns.aa2cg_universe.atoms[bead_id_1].position, + ns.aa2cg_universe.atoms[bead_id_2].position, + ns.aa2cg_universe.atoms[bead_id_3].position, + backend=ns.mda_backend, box=None) angle_values_deg = np.rad2deg(angle_values_rad) angle_avg = round(np.mean(angle_values_deg), 3) - angle_hist = np.histogram(angle_values_deg, ns.bins_angles, density=True)[0]*ns.bw_angles # retrieve 1-sum densities + angle_hist = np.histogram(angle_values_deg, ns.bins_angles, density=True)[0]*ns.bw_angles # retrieve 1-sum densities return angle_avg, angle_hist, angle_values_deg, angle_values_rad @@ -1380,21 +1444,20 @@ def get_AA_angles_distrib(ns, beads_ids): # calculate dihedrals distribution from AA trajectory def get_AA_dihedrals_distrib(ns, beads_ids): - dihedral_values_rad = np.empty(len(ns.aa_universe.trajectory) * len(beads_ids)) + dihedral_values_rad = np.empty(len(ns.aa2cg_universe.trajectory) * len(beads_ids)) for i in range(len(beads_ids)): bead_id_1, bead_id_2, bead_id_3, bead_id_4 = beads_ids[i] - frame_nb = 0 - for _ in ns.aa_universe.trajectory: - pos_1 = ns.mda_beads_atom_grps[bead_id_1].center(ns.mda_weights_atom_grps[bead_id_1], pbc=None, compound='group') # no need for PBC handling, trajectories were made wholes for the molecule - pos_2 = ns.mda_beads_atom_grps[bead_id_2].center(ns.mda_weights_atom_grps[bead_id_2], pbc=None, compound='group') - pos_3 = ns.mda_beads_atom_grps[bead_id_3].center(ns.mda_weights_atom_grps[bead_id_3], pbc=None, compound='group') - pos_4 = ns.mda_beads_atom_grps[bead_id_4].center(ns.mda_weights_atom_grps[bead_id_4], pbc=None, compound='group') - dihedral_values_rad[len(ns.aa_universe.trajectory)*i+frame_nb] = mda.lib.distances.calc_dihedrals(pos_1, pos_2, pos_3, pos_4, backend=ns.mda_backend, box=None) - frame_nb += 1 + for ts in ns.aa2cg_universe.trajectory: + dihedral_values_rad[len(ns.aa2cg_universe.trajectory)*i+ts.frame] = mda.lib.distances.calc_dihedrals( + ns.aa2cg_universe.atoms[bead_id_1].position, + ns.aa2cg_universe.atoms[bead_id_2].position, + ns.aa2cg_universe.atoms[bead_id_3].position, + ns.aa2cg_universe.atoms[bead_id_4].position, + backend=ns.mda_backend, box=None) dihedral_values_deg = np.rad2deg(dihedral_values_rad) dihedral_avg = round(np.mean(dihedral_values_deg), 3) - dihedral_hist = np.histogram(dihedral_values_deg, ns.bins_dihedrals, density=True)[0]*ns.bw_dihedrals # retrieve 1-sum densities + dihedral_hist = np.histogram(dihedral_values_deg, ns.bins_dihedrals, density=True)[0]*ns.bw_dihedrals # retrieve 1-sum densities return dihedral_avg, dihedral_hist, dihedral_values_deg, dihedral_values_rad @@ -1405,16 +1468,17 @@ def get_CG_bonds_distrib(ns, beads_ids, grp_type): bond_values = np.empty(len(ns.cg_universe.trajectory) * len(beads_ids)) for i in range(len(beads_ids)): bead_id_1, bead_id_2 = beads_ids[i] - frame_nb = 0 - for _ in ns.cg_universe.trajectory: # no need for PBC handling, trajectories were made wholes for the molecule - bond_values[len(ns.cg_universe.trajectory)*i+frame_nb] = mda.lib.distances.calc_bonds(ns.cg_universe.atoms[bead_id_1].position, ns.cg_universe.atoms[bead_id_2].position, backend=ns.mda_backend, box=None) / 10 # retrieved nm - frame_nb += 1 + for ts in ns.cg_universe.trajectory: # no need for PBC handling, trajectories were made wholes for the molecule + bond_values[len(ns.cg_universe.trajectory)*i+ts.frame] = mda.lib.distances.calc_bonds( + ns.cg_universe.atoms[bead_id_1].position, + ns.cg_universe.atoms[bead_id_2].position, + backend=ns.mda_backend, box=None) / 10 # retrieved nm bond_avg = round(np.mean(bond_values), 3) if grp_type == 'constraint': - bond_hist = np.histogram(bond_values, ns.bins_constraints, density=True)[0]*ns.bw_constraints # retrieve 1-sum densities + bond_hist = np.histogram(bond_values, ns.bins_constraints, density=True)[0]*ns.bw_constraints # retrieve 1-sum densities if grp_type == 'bond': - bond_hist = np.histogram(bond_values, ns.bins_bonds, density=True)[0]*ns.bw_bonds # retrieve 1-sum densities + bond_hist = np.histogram(bond_values, ns.bins_bonds, density=True)[0]*ns.bw_bonds # retrieve 1-sum densities return bond_avg, bond_hist, bond_values @@ -1425,15 +1489,17 @@ def get_CG_angles_distrib(ns, beads_ids): angle_values_rad = np.empty(len(ns.cg_universe.trajectory) * len(beads_ids)) for i in range(len(beads_ids)): bead_id_1, bead_id_2, bead_id_3 = beads_ids[i] - frame_nb = 0 - for _ in ns.cg_universe.trajectory: # no need for PBC handling, trajectories were made wholes for the molecule - angle_values_rad[len(ns.cg_universe.trajectory)*i+frame_nb] = mda.lib.distances.calc_angles(ns.cg_universe.atoms[bead_id_1].position, ns.cg_universe.atoms[bead_id_2].position, ns.cg_universe.atoms[bead_id_3].position, backend=ns.mda_backend, box=None) - frame_nb += 1 + for ts in ns.cg_universe.trajectory: # no need for PBC handling, trajectories were made wholes for the molecule + angle_values_rad[len(ns.cg_universe.trajectory)*i+ts.frame] = mda.lib.distances.calc_angles( + ns.cg_universe.atoms[bead_id_1].position, + ns.cg_universe.atoms[bead_id_2].position, + ns.cg_universe.atoms[bead_id_3].position, + backend=ns.mda_backend, box=None) angle_values_deg = np.rad2deg(angle_values_rad) # get group average and histogram non-null values for comparison and display angle_avg = round(np.mean(angle_values_deg), 3) - angle_hist = np.histogram(angle_values_deg, ns.bins_angles, density=True)[0]*ns.bw_angles # retrieve 1-sum densities + angle_hist = np.histogram(angle_values_deg, ns.bins_angles, density=True)[0]*ns.bw_angles # retrieve 1-sum densities return angle_avg, angle_hist, angle_values_deg, angle_values_rad @@ -1444,10 +1510,13 @@ def get_CG_dihedrals_distrib(ns, beads_ids): dihedral_values_rad = np.empty(len(ns.cg_universe.trajectory) * len(beads_ids)) for i in range(len(beads_ids)): bead_id_1, bead_id_2, bead_id_3, bead_id_4 = beads_ids[i] - frame_nb = 0 - for _ in ns.cg_universe.trajectory: # no need for PBC handling, trajectories were made wholes for the molecule - dihedral_values_rad[len(ns.cg_universe.trajectory)*i+frame_nb] = mda.lib.distances.calc_dihedrals(ns.cg_universe.atoms[bead_id_1].position, ns.cg_universe.atoms[bead_id_2].position, ns.cg_universe.atoms[bead_id_3].position, ns.cg_universe.atoms[bead_id_4].position, backend=ns.mda_backend, box=None) - frame_nb += 1 + for ts in ns.cg_universe.trajectory: # no need for PBC handling, trajectories were made wholes for the molecule + dihedral_values_rad[len(ns.cg_universe.trajectory)*i+ts.frame] = mda.lib.distances.calc_dihedrals( + ns.cg_universe.atoms[bead_id_1].position, + ns.cg_universe.atoms[bead_id_2].position, + ns.cg_universe.atoms[bead_id_3].position, + ns.cg_universe.atoms[bead_id_4].position, + backend=ns.mda_backend, box=None) dihedral_values_deg = np.rad2deg(dihedral_values_rad) # get group average and histogram non-null values for comparison and display @@ -1456,48 +1525,6 @@ def get_CG_dihedrals_distrib(ns, beads_ids): return dihedral_avg, dihedral_hist, dihedral_values_deg, dihedral_values_rad - -# gromacs potential function 1 for bonds -def gmx_bonds_func_1(x, a, b, c): - - return a/2 * (x-b)**2 + c - - -# gromacs potential function 1 for angles -def gmx_angles_func_1(x, a, b, c): - - # return a/2 * (x-b)**2 + c - return gmx_bonds_func_1(x, a, b, c) # it's actually the same - - -# gromacs potential function 2 for angles -def gmx_angles_func_2(x, a, b, c): - - return a/2 * (np.cos(x)-np.cos(b))**2 + c - - -# gromacs potential function 1 for dihedrals -- generated on the fly with adjusted multiplicity -def gmx_dihedrals_func_1(mult): - - def mult_adjusted(x, a, b, c): - - return a * (1 + np.cos(mult*x-b)) + c - - return mult_adjusted - - -# gromacs potential function 2 for dihedrals -- basically the same as potential function 2 for angles -def gmx_dihedrals_func_2(x, a, b, c): - - # return gmx_angles_func_1(x, a, b, c) - return gmx_bonds_func_1(x, a, b, c) # it's actually the same - - -# TODO: for dihedral function 9, this is the merging of several potentials of gmx_dihedrals_func_1 -- here one of mult=1 together with another of mult=2 -# def f(x,a,b,c,d,e): -# return a * (1+np.cos(x-b)) + d * (1+np.cos(2*x-e)) + c - - # update ITP force constants with Boltzmann inversion for selected geoms at this given optimization step def perform_BI(ns): @@ -1507,13 +1534,13 @@ def perform_BI(ns): # TODO: If the first opti run of BI fails, lower force constants by 10% and retry, again and again until it works, or tell the user something is very wrong after 20 tries with 50% of the force constants that all did NOT work with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=RuntimeWarning) # ignore the warning "divide by 0 encountered in true_divide" while calculating sigma + warnings.filterwarnings("ignore", category=RuntimeWarning) # ignore the warning "divide by 0 encountered in true_divide" while calculating sigma if not ns.performed_init_BI['bond'] and ns.opti_cycle['nb_geoms']['bond'] > 0: if ns.verbose: print() - print('Performing Boltzmann Inversion to estimate bonds force constants') + print('Performing Direct Boltzmann Inversion (DBI) to estimate bonds force constants') for grp_bond in range(ns.opti_cycle['nb_geoms']['bond']): @@ -1523,13 +1550,13 @@ def perform_BI(ns): nb_passes = 3 alpha = 0.55 for _ in range(nb_passes): - hist_geoms_modif = ewma(hist_geoms_modif, alpha, int(config.bi_nb_bins/10)) + hist_geoms_modif = utils.ewma(hist_geoms_modif, alpha, int(config.bi_nb_bins/10)) y = -config.kB * ns.temp * np.log(hist_geoms_modif + 1) x = np.linspace(bi_xrange[0], bi_xrange[1], config.bi_nb_bins, endpoint=True) k = config.kB * ns.temp / std_grp_bond / std_grp_bond * 100 / 2 - params_guess = [k, avg_grp_bond*10, min(y)] # multiply for amgstrom for BI + params_guess = [k, avg_grp_bond*10, min(y)] # multiply for amgstrom for BI # calculate derivative to use as sigma for fitting y_forward_shift = collections.deque(y) @@ -1540,16 +1567,16 @@ def perform_BI(ns): nb_passes = 5 for _ in range(nb_passes): - deriv = sma(deriv, int(config.bi_nb_bins/5)) + deriv = utils.sma(deriv, int(config.bi_nb_bins/5)) deriv *= np.sqrt(y/min(y)) deriv = 1/deriv sigma = np.where(y < max(y), deriv, np.inf) - popt, pcov = curve_fit(gmx_bonds_func_1, x*10, y, p0=params_guess, sigma=sigma, maxfev=99999, absolute_sigma=False) # multiply for amgstrom for BI + popt, pcov = curve_fit(gmx_bonds_func_1, x * 10, y, p0=params_guess, sigma=sigma, maxfev=99999, absolute_sigma=False) # multiply for amgstrom for BI # here we just update the force constant, bond length is already set to the average of distribution - ns.out_itp['bond'][grp_bond]['fct'] = min(max(popt[0]*100, config.default_min_fct_bonds), config.default_max_fct_bonds_bi) # stay within specified range for force constants + ns.out_itp['bond'][grp_bond]['fct'] = min(max(popt[0]*100, config.default_min_fct_bonds), config.default_max_fct_bonds_bi) # stay within specified range for force constants if ns.verbose: print(' Bond group', grp_bond+1, 'estimated force constant:', round(ns.out_itp['bond'][grp_bond]['fct'], 2)) @@ -1568,30 +1595,27 @@ def perform_BI(ns): x = np.linspace(np.deg2rad(bi_xrange[0]), np.deg2rad(bi_xrange[1]), config.bi_nb_bins, endpoint=True) k = config.kB * ns.temp / std_rad_grp_angle / std_rad_grp_angle * 100 / 2 - sigma = np.where(y < max(y), 0.1, np.inf) # this is definitely better when angles have bimodal distributions + sigma = np.where(y < max(y), 0.1, np.inf) # this is definitely better when angles have bimodal distributions # use appropriate angle function - func = ns.cg_itp['angle'][grp_angle]['funct'] + func = ns.cg_itp['angle'][grp_angle]['func'] if func == 1: params_guess = [k, std_rad_grp_angle, min(y)] popt, pcov = curve_fit(gmx_angles_func_1, x, y, p0=params_guess, sigma=sigma, maxfev=99999, absolute_sigma=False) - popt[0] = abs(popt[0]) # just to be safe, in case the fit yielded negative fct values but this is very unlikely since we provide good starting parameters for the fit + popt[0] = abs(popt[0]) # just to be safe, in case the fit yielded negative fct values but this is very unlikely since we provide good starting parameters for the fit elif func == 2: params_guess = [max(y)-min(y), std_rad_grp_angle, min(y)] try: popt, pcov = curve_fit(gmx_angles_func_2, x, y, p0=params_guess, sigma=sigma, maxfev=99999, absolute_sigma=False) - if popt[0] < 0: # correct the negative force constant that can result from the fit of stiff angles at values close to 180 + if popt[0] < 0: # correct the negative force constant that can result from the fit of stiff angles at values close to 180 popt[0] = config.default_max_fct_angles_bi * 0.8 # stiff is most probably max fct value, so get close to it elif bi_xrange[1] == 180 - ns.bw_angles/2: popt[0] += 10 - except RuntimeError: # curve fit did not converge + except RuntimeError: # curve fit did not converge popt[0] = 30 - else: - sys.exit(config.header_error+'Code error, we should never arrive here because functions have been checked during CG ITP file reading') - # here we just update the force constant, angle value is already set to the average of distribution ns.out_itp['angle'][grp_angle]['fct'] = min(max(popt[0], config.default_min_fct_angles), config.default_max_fct_angles_bi) # stay within specified range for force constants if ns.verbose: @@ -1615,24 +1639,21 @@ def perform_BI(ns): sigma = np.where(y < max(y), 0.1, np.inf) # use appropriate dihedral function - func = ns.cg_itp['dihedral'][grp_dihedral]['funct'] + func = ns.cg_itp['dihedral'][grp_dihedral]['func'] if func in config.dihedral_func_with_mult: - multiplicity = ns.cg_itp['dihedral'][grp_dihedral]['mult'] # multiplicity stays the same as in input CG ITP, it's only during model_prep that we could compare between different multiplicities + multiplicity = ns.cg_itp['dihedral'][grp_dihedral]['mult'] # multiplicity stays the same as in input CG ITP, it's only during model_prep that we could compare between different multiplicities params_guess = [max(y)-min(y), avg_rad_grp_dihedral, min(y)] popt, pcov = curve_fit(gmx_dihedrals_func_1(mult=multiplicity), x, y, p0=params_guess, sigma=sigma, maxfev=99999, absolute_sigma=False) elif func == 2: params_guess = [k, avg_rad_grp_dihedral, min(y)] popt, pcov = curve_fit(gmx_dihedrals_func_2, x, y, p0=params_guess, sigma=sigma, maxfev=99999, absolute_sigma=False) - popt[0] = abs(popt[0]) # just to be safe, in case the fit yielded negative fct values but this is very unlikely since we provide good starting parameters for the fit - - else: - sys.exit(config.header_error+'Code error, we should never arrive here because functions have been checked during CG ITP file reading') + popt[0] = abs(popt[0]) # just to be safe, in case the fit yielded negative fct values but this is very unlikely since we provide good starting parameters for the fit if ns.exec_mode == 1: ns.out_itp['dihedral'][grp_dihedral]['value'] = np.rad2deg(popt[1]) - # TODO: make the fit according to user provided dihedral angle value when using execution mode 2 + # TODO: make the fit according to user provided dihedral angle value when using execution mode 2 or 3 # stay within specified range for force constants, negative to positive according to function chosen by user # print(' Dihedral group', grp_dihedral+1, 'estimated force constant BEFORE MODIFIER:', round(popt[0], 2)) @@ -1645,46 +1666,52 @@ def perform_BI(ns): ns.performed_init_BI['dihedral'] = True - return - -# TODO: use this function from optimize_model, where this block is repeated currently def process_scaling_str(ns): # process specific bonds scaling string, if provided ns.bonds_scaling_specific = None if ns.bonds_scaling_str != config.bonds_scaling_str: - sp_str = ns.bonds_scaling_str.split() - if len(sp_str) % 2 != 0: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nPlease check your parameters, or help for an example') - ns.bonds_scaling_specific = dict() - i = 0 - try: - while i < len(sp_str): - geom_id = sp_str[i][1:] - if sp_str[i][0].upper() == 'C': - if int(geom_id) > ns.nb_constraints: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nA constraint group id exceeds the number of constraints groups defined in the input CG ITP file\nPlease check your parameters, or help for an example') - if not 'C'+geom_id in ns.bonds_scaling_specific: - if float(sp_str[i+1]) < 0: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nYou cannot provide negative values for average distribution length\nPlease check your parameters, or help for an example') - ns.bonds_scaling_specific['C'+geom_id] = float(sp_str[i+1]) - else: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nA constraint group id is provided multiple times (id: '+str(geom_id)+')\nPlease check your parameters, or help for an example') - elif sp_str[i][0].upper() == 'B': - if int(geom_id) > ns.nb_bonds: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nA bond group id exceeds the number of bonds groups defined in the input CG ITP file\nPlease check your parameters, or help for an example') - if not 'B'+geom_id in ns.bonds_scaling_specific: - if float(sp_str[i+1]) < 0: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nYou cannot provide negative values for average distribution length\nPlease check your parameters, or help for an example') - ns.bonds_scaling_specific['B'+geom_id] = float(sp_str[i+1]) - else: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nA bond group id is provided multiple times (id: '+str(geom_id)+')\nPlease check your parameters, or help for an example') - i += 2 - except ValueError: - sys.exit(config.header_error+'Cannot interpret argument -bonds_scaling_str as provided: \''+ns.bonds_scaling_str+'\'\nPlease check your parameters, or help for an example') - - return + sp_str = ns.bonds_scaling_str.split() + if len(sp_str) % 2 != 0: + msg = ( + f"Cannot interpret argument -bonds_scaling_str as provided: {ns.bonds_scaling_str}.\n" + f"Please check your parameters, or the help (-h) for an example." + ) + raise exceptions.InvalidArgument(msg) + + ns.bonds_scaling_specific = dict() + i = 0 + try: + while i < len(sp_str): + geom_id = sp_str[i][1:] + if sp_str[i][0].upper() == 'C': + if int(geom_id) > ns.nb_constraints: + info = "A constraint group id exceeds the number of constraints groups defined in the input CG ITP file." + raise exceptions.InvalidArgument('bonds_scaling_str', ns.bonds_scaling_str, info) + if not 'C' + geom_id in ns.bonds_scaling_specific: + if float(sp_str[i + 1]) < 0: + info = "You cannot provide negative values for average distribution length." + raise exceptions.InvalidArgument('bonds_scaling_str', ns.bonds_scaling_str, info) + ns.bonds_scaling_specific['C' + geom_id] = float(sp_str[i + 1]) + else: + info = f"A constraint group id is provided multiple times (id: {geom_id})" + raise exceptions.InvalidArgument('bonds_scaling_str', ns.bonds_scaling_str, info) + elif sp_str[i][0].upper() == 'B': + if int(geom_id) > ns.nb_bonds: + info = "A bond group id exceeds the number of bonds groups defined in the input CG ITP file." + raise exceptions.InvalidArgument('bonds_scaling_str', ns.bonds_scaling_str, info) + if not 'B' + geom_id in ns.bonds_scaling_specific: + if float(sp_str[i + 1]) < 0: + info = "You cannot provide negative values for average distribution length." + raise exceptions.InvalidArgument('bonds_scaling_str', ns.bonds_scaling_str, info) + ns.bonds_scaling_specific['B' + geom_id] = float(sp_str[i + 1]) + else: + info = f"A bond group id is provided multiple times (id: {geom_id})" + raise exceptions.InvalidArgument('bonds_scaling_str', ns.bonds_scaling_str, info) + i += 2 + except ValueError: + raise exceptions.InvalidArgument('bonds_scaling_str', ns.bonds_scaling_str) # compare 2 models -- atomistic and CG models with plotting @@ -1698,79 +1725,70 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False row_wise_ranges = {} row_wise_ranges['max_range_constraints'], row_wise_ranges['max_range_bonds'], row_wise_ranges['max_range_angles'], row_wise_ranges['max_range_dihedrals'] = 0, 0, 0, 0 - # read ITP file to extract bonds, angles and dihedrals to compare OR get it from the optimization script to avoid re-reading trajectory + calculating hists at each execution - # for reading ITP, groups are created by separating bonds/angles/etc lines by a return (\n) or a comment (;) - if manual_mode: - with open(ns.cg_itp_filename, 'r') as fp: - try: - itp_lines = fp.read().split('\n') - itp_lines = [itp_line.strip() for itp_line in itp_lines] - read_cg_itp_file(ns, itp_lines) - process_scaling_str(ns) - except UnicodeDecodeError: - sys.exit(config.header_error+'Cannot read CG ITP, it seems you provided a binary file.') - - # if we do not have reference already from the optimization procedure - if manual_mode: - - # read AA traj + find atom bonds connectivity and atom types (to differentiate heavy/hydrogens) - print() - read_aa_traj(ns) - load_aa_data(ns) - make_aa_traj_whole_for_selected_mols(ns) - - read_ndx_atoms2beads(ns) # read mapping, get atoms occurences in beads - get_atoms_weights_in_beads(ns) # get weights of atoms within beads - # for each CG bead, create atom groups for trajectory geoms calculation using mass and atom weights across beads - get_beads_MDA_atomgroups(ns) - - if ns.atom_only: - compute_Rg(ns, traj_type='AA') - print() - print('Radius of gyration (AA reference, NOT CG-mapped):', ns.gyr_aa, 'nm') + if ns.atom_only: + compute_Rg(ns, traj_type='AA') + print('Radius of gyration (AA reference, NOT CG-mapped):', ns.gyr_aa, 'nm') # proceed with CG data if not ns.atom_only: print('Reading CG trajectory') ns.cg_universe = mda.Universe(ns.cg_tpr_filename, ns.cg_traj_filename, in_memory=True, refresh_offsets=True, guess_bonds=False) - print(' Found', len(ns.cg_universe.trajectory), 'frames in CG trajectory file', flush=True) + print(' Found', len(ns.cg_universe.trajectory), 'frames') + + if manual_mode: + # here we read the CG beads masses + actualize the mapped trajectory object + for bead_id in range(len(ns.cg_itp['atoms'])): + ns.cg_itp['atoms'][bead_id]['mass'] = ns.cg_universe.atoms[bead_id].mass + masses = np.array([val['mass'] for val in ns.cg_itp['atoms']]) + ns.aa2cg_universe._topology.masses.values = np.array(masses) + + # create fake bonds in the CG MDA universe, that will be used only for making the molecule whole + # we make bonds between each VS and their beads definition, so we retrieve the connectivity + # iteratively towards the real CG beads, that are all connected + if len(ns.vs_beads_ids) > 0: + fake_bonds = [] + for vs_type in ['2', '3', '4', 'n']: + try: + for bead_id in ns.cg_itp['virtual_sites'+vs_type]: + for vs_def_bead_id in ns.cg_itp['virtual_sites'+vs_type][bead_id]['vs_def_beads_ids']: + fake_bonds.append([bead_id, vs_def_bead_id]) + except (IndexError, ValueError): + pass + ns.cg_universe.add_bonds(fake_bonds, guessed=False) # select the whole molecule as an MDA atomgroup and make its coordinates whole, inplace, across the complete trajectory - cg_mol = mda.AtomGroup([bead_id for bead_id in ns.all_beads], ns.cg_universe) - for _ in ns.cg_universe.trajectory: # did not help - mda.lib.mdamath.make_whole(cg_mol, inplace=True) + ag_mol = mda.AtomGroup([bead_id for bead_id in range(len(ns.cg_itp['atoms']))], ns.cg_universe) + for _ in ns.cg_universe.trajectory: + mda.lib.mdamath.make_whole(ag_mol, inplace=True) # this requires CG data for mapping -- especially, masses are taken from the CG TPR but the CG ITP is also used atm if ns.gyr_aa_mapped == None: compute_Rg(ns, traj_type='AA_mapped') print() - print('Radius of gyration (AA reference, no scaling, CG-mapped):', ns.gyr_aa_mapped, '+/-', ns.gyr_aa_mapped_std, 'nm') + print('Radius of gyration (AA reference, CG-mapped, no bonds scaling):', ns.gyr_aa_mapped, '+/-', ns.gyr_aa_mapped_std, 'nm') compute_Rg(ns, traj_type='CG') print('Radius of gyration (CG model):', ns.gyr_cg, '+/-', ns.gyr_cg_std, 'nm') if calc_sasa: - ns.probe_radius = 0.26 # nm - if ns.sasa_aa_mapped == None: compute_SASA(ns, traj_type='AA_mapped') - # print_stdout_forced('SASA (AA reference, no scaling, CG-mapped, probe radius', str(ns.probe_radius)+'):', ns.sasa_aa_mapped) - + compute_SASA(ns, traj_type='CG') print() - # print_stdout_forced(' All SASA computed fine') - # print_stdout_forced('SASA (CG model, probe radius', str(ns.probe_radius)+'):', ns.sasa_cg) - if ns.sasa_cg == None: # this line checks that gmx trjconv could read the md.xtc trajectory from the opti - # this is to catch bugged simulation that actually finished and produced the files, but the .gro is a 2D bugged file for example, or trjactory is unreadable by gmx - return 0, 0, 0, 0, 0, None # ns.sasa_cg == None will be checked in eval_function and worst score will be attributed + # this line checks that gmx trjconv could read the md.xtc trajectory from the opti + # this is to catch bugged simulation that actually finished and produced the files, + # but the .gro is a 2D bugged file for example, or trjactory is unreadable by gmx + if ns.sasa_cg == None: + return 0, 0, 0, 0, 0, None # ns.sasa_cg == None will be checked in eval_function and worst score will be attributed print() - print(config.sep_close, flush=True) + print(styling.sep_close, flush=True) print('| SCORING AND PLOTTING |', flush=True) - print(config.sep_close, flush=True) + print(styling.sep_close, flush=True) print() # constraints @@ -1787,7 +1805,7 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False constraint_avg, constraint_hist, _ = get_AA_bonds_distrib(ns, beads_ids=ns.cg_itp['constraint'][grp_constraint]['beads'], grp_type='constraints group', grp_nb=grp_constraint) constraints[grp_constraint]['AA']['avg'] = constraint_avg constraints[grp_constraint]['AA']['hist'] = constraint_hist - else: # use atomistic reference that was loaded by the optimization routines + else: # use atomistic reference that was loaded by the optimization routines constraints[grp_constraint]['AA']['avg'] = ns.cg_itp['constraint'][grp_constraint]['avg'] constraints[grp_constraint]['AA']['hist'] = ns.cg_itp['constraint'][grp_constraint]['hist'] @@ -1811,7 +1829,12 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False domain_max = max(constraints[grp_constraint]['AA']['x'][-1], constraints[grp_constraint]['CG']['x'][-1]) avg_diff_grp_constraints.append(emd(constraints[grp_constraint]['AA']['hist'], constraints[grp_constraint]['CG']['hist'], ns.bins_constraints_dist_matrix) * ns.bonds2angles_scoring_factor) except IndexError: - sys.exit(config.header_error+'Most probably because you have bonds or constraints that exceed '+str(ns.bonded_max_range)+' nm. Increase bins range for bonds and constraints and retry! See argument -bonds_max_range.') + msg = ( + f"Most probably because you have bonds or constraints that " + f"exceed {ns.bonded_max_range} nm.\nIncrease bins range for bonds and " + f"constraints and retry!\nSee argument -bonds_max_range." + ) + raise ValueError(msg) else: avg_diff_grp_constraints.append(constraints[grp_constraint]['AA']['avg']) @@ -1841,7 +1864,7 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False bond_avg, bond_hist, _ = get_AA_bonds_distrib(ns, beads_ids=ns.cg_itp['bond'][grp_bond]['beads'], grp_type='bonds group', grp_nb=grp_bond) bonds[grp_bond]['AA']['avg'] = bond_avg bonds[grp_bond]['AA']['hist'] = bond_hist - else: # use atomistic reference that was loaded by the optimization routines + else: # use atomistic reference that was loaded by the optimization routines bonds[grp_bond]['AA']['avg'] = ns.cg_itp['bond'][grp_bond]['avg'] bonds[grp_bond]['AA']['hist'] = ns.cg_itp['bond'][grp_bond]['hist'] @@ -1865,7 +1888,12 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False domain_max = max(bonds[grp_bond]['AA']['x'][-1], bonds[grp_bond]['CG']['x'][-1]) avg_diff_grp_bonds.append(emd(bonds[grp_bond]['AA']['hist'], bonds[grp_bond]['CG']['hist'], ns.bins_bonds_dist_matrix) * ns.bonds2angles_scoring_factor) except IndexError: - sys.exit(config.header_error+'Most probably because you have bonds or constraints that exceed '+str(ns.bonded_max_range)+' nm. Increase bins range for bonds and bonds and retry! See argument -bonds_max_range.') + msg = ( + f"Most probably because you have bonds or constraints that " + f"exceed {ns.bonded_max_range} nm.\nIncrease bins range for bonds and " + f"constraints and retry!\nSee argument -bonds_max_range." + ) + raise ValueError(msg) else: avg_diff_grp_bonds.append(bonds[grp_bond]['AA']['avg']) @@ -1895,7 +1923,7 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False angle_avg, angle_hist, _, _ = get_AA_angles_distrib(ns, beads_ids=ns.cg_itp['angle'][grp_angle]['beads']) angles[grp_angle]['AA']['avg'] = angle_avg angles[grp_angle]['AA']['hist'] = angle_hist - else: # use atomistic reference that was loaded by the optimization routines + else: # use atomistic reference that was loaded by the optimization routines angles[grp_angle]['AA']['avg'] = ns.cg_itp['angle'][grp_angle]['avg'] angles[grp_angle]['AA']['hist'] = ns.cg_itp['angle'][grp_angle]['hist'] @@ -1946,7 +1974,7 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False dihedral_avg, dihedral_hist, _, _ = get_AA_dihedrals_distrib(ns, beads_ids=ns.cg_itp['dihedral'][grp_dihedral]['beads']) dihedrals[grp_dihedral]['AA']['avg'] = dihedral_avg dihedrals[grp_dihedral]['AA']['hist'] = dihedral_hist - else: # use atomistic reference that was loaded by the optimization routines + else: # use atomistic reference that was loaded by the optimization routines dihedrals[grp_dihedral]['AA']['avg'] = ns.cg_itp['dihedral'][grp_dihedral]['avg'] dihedrals[grp_dihedral]['AA']['hist'] = ns.cg_itp['dihedral'][grp_dihedral]['hist'] @@ -1995,21 +2023,22 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False if larger_group > ncols: hidden_cols = larger_group - ncols if ns.atom_only: - print('Displaying max '+str(ncols)+' distributions per row using the CG ITP file ordering of distributions groups ('+str(hidden_cols)+' more are hidden)', flush=True) + print(f'Displaying max {ncols} distributions per row using the CG ITP file ordering of distributions groups ({hidden_cols} more are hidden)') else: if not ns.mismatch_order: - print(config.header_warning+'Displaying max '+str(ncols)+' distributions groups per row and this can be MISLEADING because ordering by pairwise AA-mapped vs. CG distributions mismatch is DISABLED ('+str(hidden_cols)+' more are hidden)', flush=True) + print(f'{styling.header_warning}Displaying max {ncols} distributions groups per row and this can be MISLEADING because ordering by pairwise AA-mapped vs. CG distributions mismatch is DISABLED ({hidden_cols} more are hidden)') else: - print('Displaying max '+str(ncols)+' distributions groups per row ordered by pairwise AA-mapped vs. CG distributions difference ('+str(hidden_cols)+' more are hidden)', flush=True) + print(f'Displaying max {ncols} distributions groups per row ordered by pairwise AA-mapped vs. CG distributions difference ({hidden_cols} more are hidden)') else: print() if not ns.mismatch_order: - print('Distributions groups will be displayed using the CG ITP file groups ordering', flush=True) + print('Distributions groups will be displayed using the CG ITP file groups ordering') else: - print('Distributions groups will be displayed using ranked mismatch score between pairwise AA-mapped and CG distributions', flush=True) + print('Distributions groups will be displayed using ranked mismatch score between pairwise AA-mapped and CG distributions') nrows -= sum([ns.nb_constraints == 0, ns.nb_bonds == 0, ns.nb_angles == 0, ns.nb_dihedrals == 0]) - # fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*3, nrows*3), squeeze=False) # this fucking line was responsible of the big memory leak (figures were not closing) + # fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*3, nrows*3), squeeze=False) + # this fucking line was responsible of the big memory leak (figures were not closing) so I let this here for memory fig = plt.figure(figsize=(ncols*3, nrows*3)) ax = fig.subplots(nrows=nrows, ncols=ncols, squeeze=False) @@ -2034,7 +2063,7 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False ax[nrow][i].plot(constraints[grp_constraint]['AA']['avg'], 0, color=config.atom_color, marker='D') if not ns.atom_only: - ax[nrow][i].set_title('Constraint grp '+str(grp_constraint+1)+' - EMD Δ '+str(round(avg_diff_grp_constraints[grp_constraint], 3))) + ax[nrow][i].set_title(f'Constraint grp {grp_constraint + 1} - EMD Δ {round(avg_diff_grp_constraints[grp_constraint], 3)}') if config.use_hists: ax[nrow][i].step(constraints[grp_constraint]['CG']['x'], constraints[grp_constraint]['CG']['y'], label='CG', color=config.cg_color, where='mid', alpha=config.line_alpha) ax[nrow][i].fill_between(constraints[grp_constraint]['CG']['x'], constraints[grp_constraint]['CG']['y'], color=config.cg_color, step='mid', alpha=config.fill_alpha) @@ -2042,13 +2071,11 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False ax[nrow][i].plot(constraints[grp_constraint]['CG']['x'], constraints[grp_constraint]['CG']['y'], label='CG', color=config.cg_color, alpha=config.line_alpha) ax[nrow][i].fill_between(constraints[grp_constraint]['CG']['x'], constraints[grp_constraint]['CG']['y'], color=config.cg_color, alpha=config.fill_alpha) ax[nrow][i].plot(constraints[grp_constraint]['CG']['avg'], 0, color=config.cg_color, marker='D') - # if ns.verbose: - print('Constraint '+str(grp_constraint+1)+' -- AA Avg: '+str(round(constraints[grp_constraint]['AA']['avg'], 3))+' nm -- CG Avg: '+str(round(constraints[grp_constraint]['CG']['avg'], 3))+' nm', flush=True) + print(f"Constraint {grp_constraint + 1} -- AA Avg: {round(constraints[grp_constraint]['AA']['avg'], 3)} nm -- CG Avg: {round(constraints[grp_constraint]['CG']['avg'], 3)}") else: - ax[nrow][i].set_title('Constraint grp '+str(grp_constraint+1)+' - Avg '+str(round(avg_diff_grp_constraints[grp_constraint], 3))+' nm') - print('Constraint '+str(grp_constraint+1)+' -- AA Avg: '+str(round(constraints[grp_constraint]['AA']['avg'], 3)), flush=True) + ax[nrow][i].set_title(f'Constraint grp {grp_constraint+1} - Avg {round(avg_diff_grp_constraints[grp_constraint], 3)} nm') + print(f"Constraint {grp_constraint + 1} -- AA Avg: {round(constraints[grp_constraint]['AA']['avg'], 3)}") ax[nrow][i].grid(zorder=0.5) - # ax[nrow][i].set_ylim(bottom=0) if ns.row_x_scaling: ax[nrow][i].set_xlim(np.mean(row_wise_ranges['constraints'][grp_constraint])-row_wise_ranges['max_range_constraints']/2*1.1, np.mean(row_wise_ranges['constraints'][grp_constraint])+row_wise_ranges['max_range_constraints']/2*1.1) if i % 2 == 0: @@ -2078,7 +2105,7 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False ax[nrow][i].plot(bonds[grp_bond]['AA']['avg'], 0, color=config.atom_color, marker='D') if not ns.atom_only: - ax[nrow][i].set_title('Bond grp '+str(grp_bond+1)+' - EMD Δ '+str(round(avg_diff_grp_bonds[grp_bond], 3))) + ax[nrow][i].set_title(f'Bond grp {grp_bond+1} - EMD Δ {round(avg_diff_grp_bonds[grp_bond], 3)}') if config.use_hists: ax[nrow][i].step(bonds[grp_bond]['CG']['x'], bonds[grp_bond]['CG']['y'], label='CG', color=config.cg_color, where='mid', alpha=config.line_alpha) ax[nrow][i].fill_between(bonds[grp_bond]['CG']['x'], bonds[grp_bond]['CG']['y'], color=config.cg_color, step='mid', alpha=config.fill_alpha) @@ -2086,13 +2113,11 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False ax[nrow][i].plot(bonds[grp_bond]['CG']['x'], bonds[grp_bond]['CG']['y'], label='CG', color=config.cg_color, alpha=config.line_alpha) ax[nrow][i].fill_between(bonds[grp_bond]['CG']['x'], bonds[grp_bond]['CG']['y'], color=config.cg_color, alpha=config.fill_alpha) ax[nrow][i].plot(bonds[grp_bond]['CG']['avg'], 0, color=config.cg_color, marker='D') - # if ns.verbose: - print('Bond '+str(grp_bond+1)+' -- AA Avg: '+str(round(bonds[grp_bond]['AA']['avg'], 3))+' nm -- CG Avg: '+str(round(bonds[grp_bond]['CG']['avg'], 3))+' nm', flush=True) + print(f"Bond {grp_bond + 1} -- AA Avg: {round(bonds[grp_bond]['AA']['avg'], 3)} nm -- CG Avg: {round(bonds[grp_bond]['CG']['avg'], 3)} nm") else: - ax[nrow][i].set_title('Bond grp '+str(grp_bond+1)+' - Avg '+str(round(avg_diff_grp_bonds[grp_bond], 3))+' nm') - print('Bond '+str(grp_bond+1)+' -- AA Avg: '+str(round(bonds[grp_bond]['AA']['avg'], 3)), flush=True) + ax[nrow][i].set_title(f"Bond grp {grp_bond+1} - Avg {round(avg_diff_grp_bonds[grp_bond], 3)} nm") + print(f"Bond {grp_bond+1} -- AA Avg: {round(bonds[grp_bond]['AA']['avg'], 3)}") ax[nrow][i].grid(zorder=0.5) - # ax[nrow][i].set_ylim(bottom=0) if ns.row_x_scaling: ax[nrow][i].set_xlim(np.mean(row_wise_ranges['bonds'][grp_bond])-row_wise_ranges['max_range_bonds']/2*1.1, np.mean(row_wise_ranges['bonds'][grp_bond])+row_wise_ranges['max_range_bonds']/2*1.1) if i % 2 == 0: @@ -2122,7 +2147,7 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False ax[nrow][i].plot(angles[grp_angle]['AA']['avg'], 0, color=config.atom_color, marker='D') if not ns.atom_only: - ax[nrow][i].set_title('Angle grp '+str(grp_angle+1)+' - EMD Δ '+str(round(avg_diff_grp_angles[grp_angle], 3))) + ax[nrow][i].set_title(f'Angle grp {grp_angle+1} - EMD Δ {round(avg_diff_grp_angles[grp_angle], 3)}') if config.use_hists: ax[nrow][i].step(angles[grp_angle]['CG']['x'], angles[grp_angle]['CG']['y'], label='CG', color=config.cg_color, where='mid', alpha=config.line_alpha) ax[nrow][i].fill_between(angles[grp_angle]['CG']['x'], angles[grp_angle]['CG']['y'], color=config.cg_color, step='mid', alpha=config.fill_alpha) @@ -2130,13 +2155,11 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False ax[nrow][i].plot(angles[grp_angle]['CG']['x'], angles[grp_angle]['CG']['y'], label='CG', color=config.cg_color, alpha=config.line_alpha) ax[nrow][i].fill_between(angles[grp_angle]['CG']['x'], angles[grp_angle]['CG']['y'], color=config.cg_color, alpha=config.fill_alpha) ax[nrow][i].plot(angles[grp_angle]['CG']['avg'], 0, color=config.cg_color, marker='D') - # if ns.verbose: - print('Angle '+str(grp_angle+1)+' -- AA Avg: '+str(round(angles[grp_angle]['AA']['avg'], 1))+'° -- CG Avg: '+str(round(angles[grp_angle]['CG']['avg'], 1))+'°', flush=True) + print(f"Angle {grp_angle+1} -- AA Avg: {round(angles[grp_angle]['AA']['avg'], 1)}° -- CG Avg: {round(angles[grp_angle]['CG']['avg'], 1)}°") else: - ax[nrow][i].set_title('Angle grp '+str(grp_angle+1)+' - Avg '+str(round(avg_diff_grp_angles[grp_angle], 1))+'°') - print('Angle '+str(grp_angle+1)+' -- AA Avg: '+str(round(angles[grp_angle]['AA']['avg'], 1)), flush=True) + ax[nrow][i].set_title(f"Angle grp {grp_angle+1} - Avg {round(avg_diff_grp_angles[grp_angle], 1)}°") + print(f"Angle {grp_angle+1} -- AA Avg: {round(angles[grp_angle]['AA']['avg'], 1)}") ax[nrow][i].grid(zorder=0.5) - # ax[nrow][i].set_ylim(bottom=0) if ns.row_x_scaling: ax[nrow][i].set_xlim(np.mean(row_wise_ranges['angles'][grp_angle])-row_wise_ranges['max_range_angles']/2*1.1, np.mean(row_wise_ranges['angles'][grp_angle])+row_wise_ranges['max_range_angles']/2*1.1) if i % 2 == 0: @@ -2166,7 +2189,7 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False ax[nrow][i].plot(dihedrals[grp_dihedral]['AA']['avg'], 0, color=config.atom_color, marker='D') if not ns.atom_only: - ax[nrow][i].set_title('Dihedral grp '+str(grp_dihedral+1)+' - EMD Δ '+str(round(avg_diff_grp_dihedrals[grp_dihedral], 3))) + ax[nrow][i].set_title(f'Dihedral grp {grp_dihedral+1} - EMD Δ {round(avg_diff_grp_dihedrals[grp_dihedral], 3)}') if config.use_hists: ax[nrow][i].step(dihedrals[grp_dihedral]['CG']['x'], dihedrals[grp_dihedral]['CG']['y'], label='CG', color=config.cg_color, where='mid', alpha=config.line_alpha) ax[nrow][i].fill_between(dihedrals[grp_dihedral]['CG']['x'], dihedrals[grp_dihedral]['CG']['y'], color=config.cg_color, step='mid', alpha=config.fill_alpha) @@ -2174,13 +2197,11 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False ax[nrow][i].plot(dihedrals[grp_dihedral]['CG']['x'], dihedrals[grp_dihedral]['CG']['y'], label='CG', color=config.cg_color, alpha=config.line_alpha) ax[nrow][i].fill_between(dihedrals[grp_dihedral]['CG']['x'], dihedrals[grp_dihedral]['CG']['y'], color=config.cg_color, alpha=config.fill_alpha) ax[nrow][i].plot(dihedrals[grp_dihedral]['CG']['avg'], 0, color=config.cg_color, marker='D') - # if ns.verbose: - print('Dihedral '+str(grp_dihedral+1)+' -- AA Avg: '+str(round(dihedrals[grp_dihedral]['AA']['avg'], 1))+'° -- CG Avg: '+str(round(dihedrals[grp_dihedral]['CG']['avg'], 1))+'°', flush=True) + print(f"Dihedral {grp_dihedral+1} -- AA Avg: {round(dihedrals[grp_dihedral]['AA']['avg'], 1)}° -- CG Avg: {round(dihedrals[grp_dihedral]['CG']['avg'], 1)}°") else: - ax[nrow][i].set_title('Dihedral grp '+str(grp_dihedral+1)+' - Avg '+str(round(avg_diff_grp_dihedrals[grp_dihedral], 1))+'°') - print('Dihedral '+str(grp_dihedral+1)+' -- AA Avg: '+str(round(dihedrals[grp_dihedral]['AA']['avg'], 1)), flush=True) + ax[nrow][i].set_title(f'Dihedral grp {grp_dihedral+1} - Avg {round(avg_diff_grp_dihedrals[grp_dihedral], 1)}°') + print(f"Dihedral {grp_dihedral+1} -- AA Avg: {round(dihedrals[grp_dihedral]['AA']['avg'], 1)}") ax[nrow][i].grid(zorder=0.5) - # ax[nrow][i].set_ylim(bottom=0) if ns.row_x_scaling: ax[nrow][i].set_xlim(np.mean(row_wise_ranges['dihedrals'][grp_dihedral])-row_wise_ranges['max_range_dihedrals']/2*1.1, np.mean(row_wise_ranges['dihedrals'][grp_dihedral])+row_wise_ranges['max_range_dihedrals']/2*1.1) if i % 2 == 0: @@ -2221,7 +2242,6 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False fit_score_total, fit_score_constraints_bonds, fit_score_angles, fit_score_dihedrals = 0, 0, 0, 0 for i in range(ns.nb_constraints): - # dist_pairwise = np.sqrt(avg_diff_grp_constraints[diff_ordered_grp_constraints[i]]) dist_pairwise = avg_diff_grp_constraints[diff_ordered_grp_constraints[i]] all_dist_pairwise += str(dist_pairwise)+' ' all_emd_dist_geoms['constraints'].append(dist_pairwise) @@ -2236,7 +2256,6 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False fit_score_constraints_bonds += dist_pairwise for i in range(ns.nb_bonds): - # dist_pairwise = np.sqrt(avg_diff_grp_bonds[diff_ordered_grp_bonds[i]]) dist_pairwise = avg_diff_grp_bonds[diff_ordered_grp_bonds[i]] all_dist_pairwise += str(dist_pairwise)+' ' all_emd_dist_geoms['bonds'].append(dist_pairwise) @@ -2251,7 +2270,6 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False fit_score_constraints_bonds += dist_pairwise for i in range(ns.nb_angles): - # dist_pairwise = np.sqrt(avg_diff_grp_angles[diff_ordered_grp_angles[i]]) dist_pairwise = avg_diff_grp_angles[diff_ordered_grp_angles[i]] all_dist_pairwise += str(dist_pairwise)+' ' all_emd_dist_geoms['angles'].append(dist_pairwise) @@ -2267,7 +2285,6 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False # dihedrals_dist_pairwise = 0 for i in range(ns.nb_dihedrals): - # dist_pairwise = np.sqrt(avg_diff_grp_dihedrals[diff_ordered_grp_dihedrals[i]]) dist_pairwise = avg_diff_grp_dihedrals[diff_ordered_grp_dihedrals[i]] all_dist_pairwise += str(dist_pairwise)+' ' all_emd_dist_geoms['dihedrals'].append(dist_pairwise) @@ -2280,7 +2297,6 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False dist_pairwise = dist_pairwise ** 2 fit_score_dihedrals += dist_pairwise - # dihedrals_dist_pairwise += dist_pairwise fit_score_constraints_bonds = np.sqrt(fit_score_constraints_bonds) fit_score_angles = np.sqrt(fit_score_angles) @@ -2298,18 +2314,11 @@ def compare_models(ns, manual_mode=True, ignore_dihedrals=False, calc_sasa=False print(' Angles constribution to fitness score:', fit_score_angles, flush=True) print(' Dihedrals constribution to fitness score:', fit_score_dihedrals, flush=True) - # FOR PAPER - # try: - # np.save(ns.datamol+'_Bonded_fitness.npy', np.array([fit_score_total, fit_score_constraints_bonds, fit_score_angles, fit_score_dihedrals])) - # except AttributeError: - # pass - plt.tight_layout(rect=[0, 0, 1, 0.9]) - # plt.suptitle('FITNESS SCORE\nTotal: '+str(fit_score_total)+' -- Constraints/Bonds: '+str(fit_score_constraints_bonds)+' -- Angles: '+str(fit_score_angles)+' -- Dihedrals: '+str(fit_score_dihedrals)) eval_score = fit_score_total if ignore_dihedrals and ns.nb_dihedrals > 0: eval_score -= fit_score_dihedrals - sup_title = 'FITNESS SCORE\nTotal: '+str(round(eval_score, 3))+' -- Constraints/Bonds: '+str(fit_score_constraints_bonds)+' -- Angles: '+str(fit_score_angles)+' -- Dihedrals: '+str(fit_score_dihedrals) + sup_title = f'FITNESS SCORE\nTotal: {round(eval_score, 3)} -- Constraints/Bonds: {fit_score_constraints_bonds} -- Angles: {fit_score_angles} -- Dihedrals: {fit_score_dihedrals}' if ignore_dihedrals and ns.nb_dihedrals > 0: sup_title += ' (ignored)' plt.suptitle(sup_title) @@ -2391,74 +2400,75 @@ def modify_mdp(mdp_filename, sim_time=None, nb_frames=1500, log_write_freq=5000, if sp_nstenergy_line[0].strip() == 'nstenergy': # discard other lines that could start with 'nstenergy' nstenergy_line = i - elif mdp_line.startswith('nstxout-compressed'): + elif mdp_line.startswith('nstxout-compressed') or mdp_line.startswith('nstxtcout'): sp_nstxout_compressed_line = mdp_line.split('=') nstxout_compressed_line = i # adjust simulation time according to timestep - if sim_time != None: + if sim_time is not None: if dt_line != -1 and nsteps_line != -1: nsteps = int(sim_time*1000 / dt) - mdp_lines_in[nsteps_line] = sp_nsteps_line[0]+'= '+str(nsteps)+' ; automatically modified by Swarm-CG' + mdp_lines_in[nsteps_line] = f'{sp_nsteps_line[0]}= {nsteps} ; automatically modified by Swarm-CG' else: - sys.exit(config.header_error+'The provided MD MDP file does not contain one of these entries: dt, nsteps') + msg = "The provided MD MDP file does not contain one of these entries: dt, nsteps." + raise exceptions.MissformattedFile(msg) - # force writting to the log file every given nb of steps, to make sure simulations won't be killed for insufficient writting to the log file - # (which we use to check for simulations that are stuck/bugged) + # force writting to the log file every given nb of steps, to make sure simulations won't be killed for + # insufficient writting to the log file (which we use to check for simulations that are stuck/bugged) if nstlog_line != -1: nstlog = log_write_freq - mdp_lines_in[nstlog_line] = sp_nstlog_line[0]+'= '+str(nstlog)+' ; automatically modified by Swarm-CG' + mdp_lines_in[nstlog_line] = f'{sp_nstlog_line[0]}= {nstlog} ; automatically modified by Swarm-CG' else: - sys.exit(config.header_error+'The provided MD MDP file does not contain one of these entries: nstlog') + msg = "The provided MD MDP file does not contain one of these entries: nstlog." + raise exceptions.MissformattedFile(msg) # force NOT writting coordinates data, as this can only slow the simulation and we don't need it + nstxout = nsteps if nstxout_line != -1: - nstxout = nsteps - mdp_lines_in[nstxout_line] = sp_nstxout_line[0]+'= '+str(nstxout)+' ; automatically modified by Swarm-CG' + mdp_lines_in[nstxout_line] = f'{sp_nstxout_line[0]}= {nstxout} ; automatically modified by Swarm-CG' else: - mdp_lines_in += '\nnstxout = '+str(nstxout)+' ; automatically added by Swarm-CG' + mdp_lines_in.append(f'nstxout = {nstxout} ; automatically added by Swarm-CG') # force NOT writting velocities data, as this can only slow the simulation and we don't need it + nstvout = nsteps if nstvout_line != -1: - nstvout = nsteps - mdp_lines_in[nstvout_line] = sp_nstvout_line[0]+'= '+str(nstvout)+' ; automatically modified by Swarm-CG' + mdp_lines_in[nstvout_line] = f'{sp_nstvout_line[0]}= {nstvout} ; automatically modified by Swarm-CG' else: - mdp_lines_in += '\nnstvout = '+str(nstvout)+' ; automatically added by Swarm-CG' + mdp_lines_in.append(f'nstvout = {nstvout} ; automatically added by Swarm-CG') # force NOT writting forces data, as this can only slow the simulation and we don't need it + nstfout = nsteps if nstfout_line != -1: - nstfout = nsteps - mdp_lines_in[nstfout_line] = sp_nstfout_line[0]+'= '+str(nstfout)+' ; automatically modified by Swarm-CG' + mdp_lines_in[nstfout_line] = f'{sp_nstfout_line[0]}= {nstfout} ; automatically modified by Swarm-CG' else: - mdp_lines_in += '\nnstfout = '+str(nstfout)+' ; automatically added by Swarm-CG' + mdp_lines_in.append(f'nstfout = {nstfout} ; automatically added by Swarm-CG') - # force calculating and writing frames at given frequency, to not slow down the simulation too much but still allow for energy analysis + # force calculating and writing frames at given frequency, to not slow down + # the simulation too much but still allow for energy analysis nstcalcenergy = int(nsteps / nb_frames / energy_write_nb_frames_ratio) nstenergy = nstcalcenergy if nstcalcenergy_line != -1: - mdp_lines_in[nstcalcenergy_line] = sp_nstcalcenergy_line[0]+'= '+str(nstcalcenergy)+' ; automatically modified by Swarm-CG' + mdp_lines_in[nstcalcenergy_line] = f'{sp_nstcalcenergy_line[0]}= {nstcalcenergy} ; automatically modified by Swarm-CG' else: - mdp_lines_in += '\nnstcalcenergy = '+str(nstcalcenergy)+' ; automatically added by Swarm-CG' + mdp_lines_in.append(f'nstcalcenergy = {nstcalcenergy} ; automatically added by Swarm-CG') if nstenergy_line != -1: - mdp_lines_in[nstenergy_line] = sp_nstenergy_line[0]+'= '+str(nstenergy)+' ; automatically modified by Swarm-CG' + mdp_lines_in[nstenergy_line] = f'{sp_nstenergy_line[0]}= {nstenergy} ; automatically modified by Swarm-CG' else: - mdp_lines_in += '\nnstenergy = '+str(nstenergy)+' ; automatically added by Swarm-CG' + mdp_lines_in.append(f'nstenergy = {nstenergy} ; automatically added by Swarm-CG') # force writting compressed frames at given frequency, so that we obtain the desired number of frames for each CG simulation/evaluation step nstxout_compressed = int(nsteps / nb_frames) if nstxout_compressed_line != -1: - mdp_lines_in[nstxout_compressed_line] = sp_nstxout_compressed_line[0]+'= '+str(nstxout_compressed)+' ; automatically modified by Swarm-CG' + mdp_lines_in[nstxout_compressed_line] = f'{sp_nstxout_compressed_line[0]}= {nstxout_compressed} ; automatically modified by Swarm-CG' else: # sys.exit(config.header_error+'The provided MD MDP file does not contain one of these entries: nstxout-compressed') - mdp_lines_in += '\nnstxout-compressed = '+str(nstxout_compressed)+' ; automatically added by Swarm-CG' + mdp_lines_in.append(f'nstxout-compressed = {nstxout_compressed} ; automatically added by Swarm-CG') # write output with open(mdp_filename, 'w') as fp: for mdp_line in mdp_lines_in: fp.write(mdp_line+'\n') - return - # execute command and return output def cmdline(command): @@ -2479,8 +2489,6 @@ def print_stdout_forced(*args, **kwargs): with contextlib.redirect_stdout(sys.__stdout__): print(*args, **kwargs, flush=True) - return - # evaluation function to be optimized using FST-PSO def eval_function(parameters_set, ns): @@ -2489,23 +2497,26 @@ def eval_function(parameters_set, ns): start_eval_ts = datetime.now().timestamp() print_stdout_forced() - print_stdout_forced('Starting iteration', ns.nb_eval, 'at', time.strftime('%H:%M:%S'), 'on', time.strftime('%d-%m-%Y')) + # TODO: this should use logging + print_stdout_forced( + f"Starting iteration {ns.nb_eval} at {time.strftime('%H:%M:%S')} on {time.strftime('%d-%m-%Y')}" + ) # enter the execution directory os.chdir(ns.exec_folder) # create new directory for new parameters evaluation - current_eval_dir = config.iteration_sim_files_dirname+'_eval_step_'+str(ns.nb_eval) + current_eval_dir = f'{config.iteration_sim_files_dirname}_eval_step_{ns.nb_eval}' shutil.copytree(config.input_sim_files_dirname, current_eval_dir) # create a modified CG ITP file with parameters according to current evaluation type update_cg_itp_obj(ns, parameters_set=parameters_set, update_type=1) - out_path_itp = config.iteration_sim_files_dirname+'_eval_step_'+str(ns.nb_eval)+'/'+ns.cg_itp_basename + out_path_itp = f'{config.iteration_sim_files_dirname}_eval_step_{ns.nb_eval}/{ns.cg_itp_basename}' if ns.opti_cycle['nb_geoms']['dihedral'] == 0: print_sections = ['constraint', 'bond', 'angle', 'exclusion'] else: print_sections = ['constraint', 'bond', 'angle', 'dihedral', 'exclusion'] - print_cg_itp_file(ns.out_itp, out_path_itp, print_sections=print_sections) + write_cg_itp_file(ns.out_itp, out_path_itp, print_sections=print_sections) # enter current evaluation directory and stay there until all sims are finished or failed os.chdir(current_eval_dir) @@ -2520,37 +2531,44 @@ def eval_function(parameters_set, ns): # ns.gro_input_basename = previous_best_final_conf # grompp -- minimization - gmx_cmd = ns.gmx_path+' grompp -c '+ns.gro_input_basename+' -p '+ns.top_input_basename+' -f '+ns.mdp_minimization_basename+' -o mini -maxwarn '+str(ns.mini_maxwarn) + gmx_cmd = f'{ns.gmx_path} grompp -c {ns.gro_input_basename} -p {ns.top_input_basename} -f {ns.mdp_minimization_basename} -o mini -maxwarn {ns.mini_maxwarn}' with subprocess.Popen([gmx_cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as gmx_process: gmx_out = gmx_process.communicate()[1].decode() gmx_process.kill() if gmx_process.returncode == 0: # mdrun -- minimization - gmx_cmd = gmx_args(ns.gmx_path+' mdrun -deffnm mini', ns.nb_threads, ns.gpu_id, ns.gmx_args_str) - with subprocess.Popen([gmx_cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid) as gmx_process: # create a process group for the minimization run + gmx_cmd = gmx_args(ns, 'mdrun -deffnm mini', mpi=False) + with subprocess.Popen([gmx_cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid) as gmx_process: # create a process group for the minimization run # check if minimization run is stuck because of instabilities cycles_check = 0 last_log_file_size = 0 - while gmx_process.poll() is None: # while process is alive + while gmx_process.poll() is None: # while process is alive time.sleep(ns.process_alive_time_sleep) cycles_check += 1 - if cycles_check % ns.process_alive_nb_cycles_dead == 0: # every minute or so, kill process if we determine it is stuck because the .log file's bytes size has not changed + if cycles_check % ns.process_alive_nb_cycles_dead == 0: # every minute or so, kill process if we determine it is stuck because the .log file's bytes size has not changed if os.path.isfile(current_eval_dir+'/mini.log'): - log_file_size = os.path.getsize(current_eval_dir+'/mini.log') # get size of .log file in bytes, as a mean of detecting the minimization run is stuck + log_file_size = os.path.getsize(current_eval_dir+'/mini.log') # get size of .log file in bytes, as a mean of detecting the minimization run is stuck else: - log_file_size = last_log_file_size # minimization is stuck if the process was not able to create log file at start - if log_file_size == last_log_file_size: # minimization is stuck if the process is not writing to log file anymore - os.killpg(os.getpgid(gmx_process.pid), signal.SIGKILL) # kill all processes of process group + log_file_size = last_log_file_size # minimization is stuck if the process was not able to create log file at start + if log_file_size == last_log_file_size: # minimization is stuck if the process is not writing to log file anymore + os.killpg(os.getpgid(gmx_process.pid), signal.SIGKILL) # kill all processes of process group mini_killed = True else: last_log_file_size = log_file_size gmx_process.kill() else: - sys.exit('\n\n'+config.header_gmx_error+gmx_out+'\n'+config.header_error+'Gmx grompp failed at minimization step, see gmx error message above\nPlease check the parameters of the MDP file provided through argument -cg_sim_mdp_mini\nYou may also want to look into argument -mini_maxwarn\nIf you think this is a bug, please consider opening an issue on GitHub at '+config.github_url+'\n') + msg = ( + f"{gmx_out}\n\n" + f"Gromacs grompp failed at MD minimization step, see its error message above.\n" + f"You may also want to check the parameters of the MDP file provided through\n" + f"argument -cg_sim_mdp_mini. If you think this is a bug, please consider opening\n" + f"an issue on GitHub at {config.github_url}/issues." + ) + raise exceptions.ComputationError(msg) # if minimization finished properly, we just check for the .gro file printed in the end if os.path.isfile('mini.gro'): @@ -2563,37 +2581,43 @@ def eval_function(parameters_set, ns): if gmx_process.returncode == 0: # mdrun -- EQUI - gmx_cmd = gmx_args(ns.gmx_path+' mdrun -deffnm equi', ns.nb_threads, ns.gpu_id, ns.gmx_args_str) + gmx_cmd = gmx_args(ns, 'mdrun -deffnm equi') with subprocess.Popen([gmx_cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid) as gmx_process: # create a process group for the EQUI run # check if EQUI run is stuck because of instabilities cycles_check = 0 last_log_file_size = 0 - while gmx_process.poll() is None: # while process is alive + while gmx_process.poll() is None: # while process is alive time.sleep(ns.process_alive_time_sleep) cycles_check += 1 - if cycles_check % ns.process_alive_nb_cycles_dead == 0: # every minute or so, kill process if we determine it is stuck because the .log file's bytes size has not changed + if cycles_check % ns.process_alive_nb_cycles_dead == 0: # every minute or so, kill process if we determine it is stuck because the .log file's bytes size has not changed if os.path.isfile(current_eval_dir+'/equi.log'): - log_file_size = os.path.getsize(current_eval_dir+'/equi.log') # get size of .log file in bytes, as a mean of detecting the EQUI run is stuck + log_file_size = os.path.getsize(current_eval_dir+'/equi.log') # get size of .log file in bytes, as a mean of detecting the EQUI run is stuck else: - log_file_size = last_log_file_size # EQUI is stuck if the process was not able to create log file at start - if log_file_size == last_log_file_size: # EQUI is stuck if the process is not writing to log file anymore - os.killpg(os.getpgid(gmx_process.pid), signal.SIGKILL) # kill all processes of process group + log_file_size = last_log_file_size # EQUI is stuck if the process was not able to create log file at start + if log_file_size == last_log_file_size: # EQUI is stuck if the process is not writing to log file anymore + os.killpg(os.getpgid(gmx_process.pid), signal.SIGKILL) # kill all processes of process group equi_killed = True else: last_log_file_size = log_file_size gmx_process.kill() else: - # pass - sys.exit('\n\n'+config.header_gmx_error+gmx_out+'\n'+config.header_error+'Gmx grompp failed at equilibration step, see gmx error message above\nPlease check the parameters of the MDP file provided through argument -cg_sim_mdp_equi\nIf you think this is a bug, please consider opening an issue on GitHub at '+config.github_url+'\n') + msg = ( + f"{gmx_out}\n\n" + f"Gromacs grompp failed at MD equilibration step, see its error message above.\n" + f"You may also want to check the parameters of the MDP file provided through\n" + f"argument -cg_sim_mdp_equi. If you think this is a bug, please consider opening\n" + f"an issue on GitHub at {config.github_url}/issues." + ) + raise exceptions.ComputationError(msg) # if EQUI finished properly, we just check for the .gro file printed in the end if os.path.isfile('equi.gro'): # adapt duration of the simulation - modify_mdp(mdp_filename=ns.mdp_md_basename, sim_time=ns.prod_sim_time) # TODO: check that everything still make sense + modify_mdp(mdp_filename=ns.mdp_md_basename, sim_time=ns.prod_sim_time) # grompp -- MD gmx_cmd = ns.gmx_path+' grompp -c equi.gro -p '+ns.top_input_basename+' -f '+ns.mdp_md_basename+' -o md' @@ -2603,31 +2627,37 @@ def eval_function(parameters_set, ns): if gmx_process.returncode == 0: # mdrun -- MD - gmx_cmd = gmx_args(ns.gmx_path+' mdrun -deffnm md', ns.nb_threads, ns.gpu_id, ns.gmx_args_str) - with subprocess.Popen([gmx_cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid) as gmx_process: # create a process group for the MD run + gmx_cmd = gmx_args(ns, 'mdrun -deffnm md') + with subprocess.Popen([gmx_cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid) as gmx_process: # create a process group for the MD run # check if MD run is stuck because of instabilities cycles_check = 0 last_log_file_size = 0 - while gmx_process.poll() is None: # while process is alive + while gmx_process.poll() is None: # while process is alive time.sleep(ns.process_alive_time_sleep) cycles_check += 1 - if cycles_check % ns.process_alive_nb_cycles_dead == 0: # every minute or so, kill process if we determine it is stuck because the .log file's bytes size has not changed + if cycles_check % ns.process_alive_nb_cycles_dead == 0: # every minute or so, kill process if we determine it is stuck because the .log file's bytes size has not changed if os.path.isfile('md.log'): - log_file_size = os.path.getsize('md.log') # get size of .log file in bytes, as a mean of detecting the MD run is stuck + log_file_size = os.path.getsize('md.log') # get size of .log file in bytes, as a mean of detecting the MD run is stuck else: - log_file_size = last_log_file_size # MD run is stuck if the process was not able to create log file at start - if log_file_size == last_log_file_size: # MD run is stuck if the process is not writing to log file anymore - os.killpg(os.getpgid(gmx_process.pid), signal.SIGKILL) # kill all processes of process group + log_file_size = last_log_file_size # MD run is stuck if the process was not able to create log file at start + if log_file_size == last_log_file_size: # MD run is stuck if the process is not writing to log file anymore + os.killpg(os.getpgid(gmx_process.pid), signal.SIGKILL) # kill all processes of process group md_run_killed = True else: last_log_file_size = log_file_size gmx_process.kill() else: - # pass - sys.exit('\n\n'+config.header_gmx_error+gmx_out+'\n'+config.header_error+'Gmx grompp failed at the MD step, see gmx error message above\nPlease check the parameters of the MDP file provided through argument -cg_sim_mdp_prod\nIf you think this is a bug, please consider opening an issue on GitHub at '+config.github_url+'\n') + msg = ( + f"{gmx_out}\n\n" + f"Gromacs grompp failed at MD production step, see its error message above.\n" + f"You may also want to check the parameters of the MDP file provided through\n" + f"argument -cg_sim_mdp_md. If you think this is a bug, please consider opening\n" + f"an issue on GitHub at {config.github_url}/issues." + ) + raise exceptions.ComputationError(msg) # to verify if MD run finished properly, we check for the .gro file printed in the end if os.path.isfile('md.gro'): @@ -2646,10 +2676,10 @@ def eval_function(parameters_set, ns): ns.total_model_eval_time += datetime.now().timestamp() - start_model_eval_ts # if gmx sasa failed to compute, it's most likely because there were inconsistent shifts across PBC in the trajectory = failed run - if ns.sasa_cg != None: + if ns.sasa_cg is not None: # store the distributions for each evaluation step - shutil.move('distributions.png', '../'+config.distrib_plots_all_evals_dirname+'/distributions_eval_step_'+str(ns.nb_eval)+'.png') + shutil.move('distributions.png', f'../{config.distrib_plots_all_evals_dirname}/distributions_eval_step_{ns.nb_eval}.png') eval_score = 0 if 'constraint' in ns.opti_cycle['geoms'] and 'bond' in ns.opti_cycle['geoms']: @@ -2666,37 +2696,6 @@ def eval_function(parameters_set, ns): global_score += fit_score_angles if 'dihedral' in ns.opti_geoms_all: global_score += fit_score_dihedrals - - # ns.all_rg_last_cycle = np.append(ns.all_rg_last_cycle, ns.gyr_cg) - # ns.all_fitness_last_cycle = np.append(ns.all_fitness_last_cycle, global_score_with_dihedrals) - - # rg_mask = np.where(ns.all_rg_last_cycle != None)[0] # mask values from runs that did not finish - # regular_eval = False # select between model selection based on bonded fitness exclusively or mixed with Rg - - # # new final model selection based on both the Rg and the bonded fitness, having ranges normalized within each cycle - # # using median to get rid of outliers big scores of Rg/fitness - # if len(rg_mask) > 1: - # try: - # dist_rg_abs = abs(ns.all_rg_last_cycle[rg_mask] - ns.gyr_aa_mapped) - # all_delta_rg = (dist_rg_abs - np.amin(dist_rg_abs)) / (np.amax(dist_rg_abs) - np.amin(dist_rg_abs)) - # all_delta_fitness = (ns.all_fitness_last_cycle[rg_mask] - np.amin(ns.all_fitness_last_cycle[rg_mask])) / (np.amax(ns.all_fitness_last_cycle[rg_mask]) - np.amin(ns.all_fitness_last_cycle[rg_mask])) - - # # get index of minimum (i.e. best fitted, using both bonded fitness and Rg) - # id_best_model_combo_score = np.argmin( (all_delta_fitness**2 + all_delta_rg**2) ** (1/2) ) # first id is used if several results are returned - - # # if this is a new best - # if id_best_model_combo_score > ns.best_fitness_Rg_combined: - # ns.best_fitness_Rg_combined = id_best_model_combo_score - # if ns.opti_cycle['nb_geoms']['dihedral'] == 0: - # new_best_fit_without_dihedrals = True - # ns.best_fitness_without_dihedrals = global_score_without_dihedrals, ns.nb_eval - # else: - # new_best_fit_with_dihedrals = True - # ns.best_fitness_with_dihedrals = global_score_with_dihedrals, ns.nb_eval - # except ZeroDivisionError: - # regular_eval = True - # else: - # regular_eval = True # model selection based only on bonded parametrization score regular_eval = True @@ -2707,66 +2706,71 @@ def eval_function(parameters_set, ns): ns.all_emd_dist_geoms = all_emd_dist_geoms else: - print_stdout_forced(' MD run failed (molecule exploded)') - eval_score, fit_score_total, fit_score_constraints_bonds, fit_score_angles, fit_score_dihedrals = [ns.worst_fit_score]*5 + print_stdout_forced(" MD run failed (molecule exploded)") + eval_score, fit_score_total, fit_score_constraints_bonds, fit_score_angles, fit_score_dihedrals = [ns.worst_fit_score] * 5 ns.gyr_cg, ns.gyr_cg_std, ns.sasa_cg, ns.sasa_cg_std = None, None, None, None - # ns.all_rg_last_cycle = np.append(ns.all_rg_last_cycle, None) - # ns.all_fitness_last_cycle = np.append(ns.all_fitness_last_cycle, None) ns.total_gmx_time += datetime.now().timestamp() - start_gmx_ts else: if md_run_killed: - print_stdout_forced(' MD run failed (unstable simulation was killed, with unstable = NOT writing in log file for '+str(ns.sim_kill_delay)+' sec)') + print_stdout_forced( + f" MD run failed (unstable simulation was killed, with unstable " + f"= NOT writing in log file for {str(ns.sim_kill_delay)} sec)" + ) else: print_stdout_forced(' MD run failed (simulation process terminated with error)') - eval_score, fit_score_total, fit_score_constraints_bonds, fit_score_angles, fit_score_dihedrals = [ns.worst_fit_score]*5 + eval_score, fit_score_total, fit_score_constraints_bonds, fit_score_angles, fit_score_dihedrals = [ns.worst_fit_score] * 5 ns.gyr_cg, ns.gyr_cg_std, ns.sasa_cg, ns.sasa_cg_std = None, None, None, None - # ns.all_rg_last_cycle = np.append(ns.all_rg_last_cycle, None) - # ns.all_fitness_last_cycle = np.append(ns.all_fitness_last_cycle, None) ns.total_gmx_time += datetime.now().timestamp() - start_gmx_ts else: if equi_killed: - print_stdout_forced(' Equilibration run failed (unstable simulation was killed, with unstable = NOT writing in log file for '+str(ns.sim_kill_delay)+' sec)') + print_stdout_forced( + f" Equilibration run failed (unstable simulation was killed, with unstable " + f"= NOT writing in log file for {str(ns.sim_kill_delay)} sec)" + ) else: - print_stdout_forced(' Equilibration run failed (simulation process terminated with error)') - eval_score, fit_score_total, fit_score_constraints_bonds, fit_score_angles, fit_score_dihedrals = [ns.worst_fit_score]*5 + print_stdout_forced( + " Equilibration run failed (simulation process terminated with error)" + ) + eval_score, fit_score_total, fit_score_constraints_bonds, fit_score_angles, fit_score_dihedrals = [ns.worst_fit_score] * 5 ns.gyr_cg, ns.gyr_cg_std, ns.sasa_cg, ns.sasa_cg_std = None, None, None, None - # ns.all_rg_last_cycle = np.append(ns.all_rg_last_cycle, None) - # ns.all_fitness_last_cycle = np.append(ns.all_fitness_last_cycle, None) ns.total_gmx_time += datetime.now().timestamp() - start_gmx_ts else: if mini_killed: - print_stdout_forced(' Minimization run failed (unstable simulation was killed, with unstable = NOT writing in log file for '+str(ns.sim_kill_delay)+' sec)') + print_stdout_forced( + f" Minimization run failed (unstable simulation was killed, with unstable " + f"= NOT writing in log file for {str(ns.sim_kill_delay)} sec)" + ) else: - print_stdout_forced(' Minimization run failed (simulation process terminated with error)') - eval_score, fit_score_total, fit_score_constraints_bonds, fit_score_angles, fit_score_dihedrals = [ns.worst_fit_score]*5 + print_stdout_forced( + " Minimization run failed (simulation process terminated with error)" + ) + eval_score, fit_score_total, fit_score_constraints_bonds, fit_score_angles, fit_score_dihedrals = [ns.worst_fit_score] * 5 ns.gyr_cg, ns.gyr_cg_std, ns.sasa_cg, ns.sasa_cg_std = None, None, None, None - # ns.all_rg_last_cycle = np.append(ns.all_rg_last_cycle, None) - # ns.all_fitness_last_cycle = np.append(ns.all_fitness_last_cycle, None) ns.total_gmx_time += datetime.now().timestamp() - start_gmx_ts # exit current eval directory os.chdir('..') - # store log files + # store all log files if os.path.isfile(current_eval_dir+'/md.log'): - shutil.copy(current_eval_dir+'/md.log', config.log_files_all_evals_dirname+'/MD_sim_eval_step_'+str(ns.nb_eval)+'.log') # copy prod log file - elif os.path.isfile(current_eval_dir+'/equi.log'): - shutil.copy(current_eval_dir+'/equi.log', config.log_files_all_evals_dirname+'/equi_sim_eval_step_'+str(ns.nb_eval)+'.log') # copy equi log file - elif os.path.isfile(current_eval_dir+'/mini.log'): - shutil.copy(current_eval_dir+'/mini.log', config.log_files_all_evals_dirname+'/mini_sim_eval_step_'+str(ns.nb_eval)+'.log') # copy mini log file + shutil.copy(current_eval_dir+'/md.log', f'{config.log_files_all_evals_dirname}/MD_sim_eval_step_{ns.nb_eval}.log') + if os.path.isfile(current_eval_dir+'/equi.log'): + shutil.copy(current_eval_dir+'/equi.log', f'{config.log_files_all_evals_dirname}/equi_sim_eval_step_{ns.nb_eval}.log') + if os.path.isfile(current_eval_dir+'/mini.log'): + shutil.copy(current_eval_dir+'/mini.log', f'{config.log_files_all_evals_dirname}/mini_sim_eval_step_{ns.nb_eval}.log') # update the best results distrib plot in execution directory if new_best_fit: - shutil.copy(config.distrib_plots_all_evals_dirname+'/distributions_eval_step_'+str(ns.nb_eval)+'.png', config.best_distrib_plots) + shutil.copy(f'{config.distrib_plots_all_evals_dirname}/distributions_eval_step_{ns.nb_eval}.png', config.best_distrib_plots) # keep all sim files if user wants to if ns.keep_all_sims: shutil.copytree(current_eval_dir, config.sim_files_all_evals_dirname+'/'+current_eval_dir) # keep BI files (the very first guess of bonded parameters) only for figures - # TODO: remove + # TODO: remove ?? this is redundant because we already produce a directory with output for the best current model if ns.nb_eval == 1: shutil.copytree(current_eval_dir, 'boltzmann_inv_CG_model') @@ -2789,16 +2793,14 @@ def eval_function(parameters_set, ns): if new_best_fit: print_stdout_forced(' --> Selected as new best bonded parametrization') # print_stdout_forced(' Opti context mismatch score:', round(eval_score, 3)) - print_stdout_forced(' Rg CG: ', ' '+str(round(ns.gyr_cg, 2)), 'nm (Error abs.', str(round(abs(1-ns.gyr_cg/ns.gyr_aa_mapped)*100, 1))+'% -- Reference Rg AA-mapped:', str(ns.gyr_aa_mapped)+' nm)') - print_stdout_forced(' SASA CG:', ns.sasa_cg, 'nm2 (Error abs.', str(round(abs(1-ns.sasa_cg/ns.sasa_aa_mapped)*100, 1))+'% -- Reference SASA AA-mapped:', str(ns.sasa_aa_mapped)+' nm2)') - # if ns.opti_cycle['nb_geoms']['dihedral'] == 0: - # print_stdout_forced(' Dihedrals currently ignored') + print_stdout_forced(f' Rg CG: {round(ns.gyr_cg, 2)} nm (Error abs. {round(abs(1 - ns.gyr_cg / ns.gyr_aa_mapped) * 100, 1)}% -- Reference Rg AA-mapped: {ns.gyr_aa_mapped} nm)') + print_stdout_forced(f' SASA CG: {ns.sasa_cg} nm2 (Error abs. {round(abs(1 - ns.sasa_cg / ns.sasa_aa_mapped) * 100, 1)}% -- Reference SASA AA-mapped: {ns.sasa_aa_mapped} nm2)') current_total_time = round((datetime.now().timestamp() - ns.start_opti_ts) / (60 * 60), 2) current_eval_time = datetime.now().timestamp() - start_eval_ts ns.total_eval_time += current_eval_time current_eval_time = round(current_eval_time / 60, 2) - print_stdout_forced(' Iteration time:', current_eval_time, 'min') + print_stdout_forced(f' Iteration time: {current_eval_time} min') # write all pairwise distances between atom mapped and CG geoms to file for later global optimization perf plotting with open(config.opti_pairwise_distances_file, 'a') as fp: @@ -2809,20 +2811,20 @@ def eval_function(parameters_set, ns): with open(config.opti_perf_recap_file, 'a') as fp: recap_line = ' '.join(list(map(str, (ns.opti_cycle['nb_cycle'], ns.nb_eval, fit_score_total, fit_score_constraints_bonds, fit_score_angles, fit_score_dihedrals, eval_score, ns.gyr_aa_mapped, ns.gyr_aa_mapped_std, ns.gyr_cg, ns.gyr_cg_std, ns.sasa_aa_mapped, ns.sasa_aa_mapped_std, ns.sasa_cg, ns.sasa_cg_std))))+' ' for i in range(len(ns.cg_itp['constraint'])): - recap_line += str(ns.out_itp['constraint'][i]['value'])+' ' + recap_line += f"{ns.out_itp['constraint'][i]['value']} " for i in range(len(ns.cg_itp['bond'])): - recap_line += str(ns.out_itp['bond'][i]['value'])+' '+str(ns.out_itp['bond'][i]['fct'])+' ' + recap_line += f"{ns.out_itp['bond'][i]['value']} {ns.out_itp['bond'][i]['fct']} " for i in range(len(ns.cg_itp['angle'])): - recap_line += str(ns.out_itp['angle'][i]['value'])+' '+str(ns.out_itp['angle'][i]['fct'])+' ' + recap_line += f"{ns.out_itp['angle'][i]['value']} {ns.out_itp['angle'][i]['fct']} " for i in range(len(ns.cg_itp['dihedral'])): if ns.opti_cycle['nb_geoms']['dihedral'] == 0: recap_line += '0 0 ' else: - recap_line += str(ns.out_itp['dihedral'][i]['value'])+' '+str(ns.out_itp['dihedral'][i]['fct'])+' ' - recap_line += str(current_eval_time)+' '+str(current_total_time) + recap_line += f"{ns.out_itp['dihedral'][i]['value']} {ns.out_itp['dihedral'][i]['fct']} " + recap_line += f'{current_eval_time} {current_total_time}' fp.write(recap_line+'\n') - os.chdir('..') # exit the execution directory + os.chdir('..') # exit the execution directory return eval_score diff --git a/tests/data/cg_model.itp b/tests/data/cg_model.itp new file mode 100644 index 0000000..8cd26c4 --- /dev/null +++ b/tests/data/cg_model.itp @@ -0,0 +1,240 @@ +; This file can replace the one in directory /G1_DATA for testing purposes +; and can be modified notably for: +; +; -- Adding virtual sites and testing functions +; -- Verifying behavior with different residue names (might be an issue while mapping trajectory to CG) +; -- Checking if virtual sites can be defined in the middle of the [ atoms ] without crashing everything +; (NOT sure this is allowed by MARTINI convention though) +; -- Verify we allow to have 2x [ atoms ] sections for regular/vs beads (this is allowed in MARTINI format) +; +; To run tests without modifying the DEMO DATA FOR USERS we can invoke the script like this: +; $PROJ_PATH/Swarm-CG/swarmcg/optimize_model.py -gmx $GMX_ALIAS -in_dir G1_DATA -cg_itp tests/data/cg_model.itp -cg_top tests/data/system.top +; +; TODO: add test files for another and more "complicated" molecular model, like the porphyrin + +[ moleculetype ] +; molname nrexcl +G1 1 + + +[ atoms ] +; id type resnr residue atom cgnr charge + +1 N0 1 G1 A1 1 0.00000 +2 N0 1 G1 A2 2 0.00000 +3 Nda 1 G1 A3 3 0.00000 +4 N0 1 G1 A4 4 0.00000 +5 Nda 1 G1 A5 5 0.00000 +6 Qd 1 G1 A6 6 1.00000 +7 Nda 1 G1 A7 7 0.00000 +8 Qd 1 G1 A8 8 1.00000 +9 Nda 1 G1 A9 9 0.00000 +10 N0 1 G1 A10 10 0.00000 +11 Nda 1 G1 A11 11 0.00000 +12 Qd 2 G1 A12 12 1.00000 +13 Nda 2 G1 A13 13 0.00000 +14 Qd 1 G1 A14 14 1.00000 +15 Nda 1 G1 A15 15 0.00000 +16 N0 1 G1 A16 16 0.00000 +17 Nda 1 G1 A17 17 0.00000 +18 Qd 1 G1 A18 18 1.00000 +19 Nda 1 G1 A19 19 0.00000 +20 Qd 1 G1 A20 20 1.00000 +21 Nda 1 G1 A21 21 0.00000 +22 N0 1 G1 A22 22 0.00000 +23 Nda 1 G1 A23 23 0.00000 +24 Qd 1 G1 A24 24 1.00000 +25 Nda 1 G1 A25 25 0.00000 +26 Qd 1 G1 A26 26 1.00000 +27 vT 1 G1 V27 27 0.00000 +28 vT 1 G1 V28 28 0.00000 +29 vT 1 G1 V29 29 0.00000 +30 vT 1 G1 V30 30 0.00000 +31 vT 1 G1 V31 31 0.00000 +32 vT 1 G1 V32 32 0.00000 +33 vT 1 G1 V33 33 0.00000 +34 vT 1 G1 V34 34 0.00000 +35 vT 1 G1 V35 35 0.00000 +36 vT 1 G1 V36 36 0.00000 + + +;[ constraints ] +; i j funct length +; 1 2 1 0 ; C1 + +[ bonds ] +; i j funct length force.c. + +; bond group 1 + 1 2 1 0 0 ; B1 + +; bond group 2 + 1 3 1 0 0 ; B2 + 1 9 1 0 0 ; B2 + 2 15 1 0 0 ; B2 + 2 21 1 0 0 ; B2 + 4 5 1 0 0 ; B2 + 4 7 1 0 0 ; B2 + 10 11 1 0 0 ; B2 + 10 13 1 0 0 ; B2 + 16 17 1 0 0 ; B2 + 16 19 1 0 0 ; B2 + 22 23 1 0 0 ; B2 + 22 25 1 0 0 ; B2 + +; bond group 3 + 3 4 1 0 0 ; B3 + 9 10 1 0 0 ; B3 + 15 16 1 0 0 ; B3 + 21 22 1 0 0 ; B3 + +; bond group 4 + 5 6 1 0 0 ; B4 + 7 8 1 0 0 ; B4 + 11 12 1 0 0 ; B4 + 13 14 1 0 0 ; B4 + 17 18 1 0 0 ; B4 + 19 20 1 0 0 ; B4 + 23 24 1 0 0 ; B4 + 25 26 1 0 0 ; B4 + +; bond group 5 + 3 28 1 0 0 ; B5 + +; bond group 6 + 9 28 1 0 0 ; B6 + +; bond group 7 + 16 32 1 0 0 ; B7 + +; bond group 8 + 3 29 1 0 0 ; B8 + +; bond group 9 + 9 29 1 0 0 ; B9 + +; bond group 10 + 7 27 1 0 0 ; B10 + +; bond group 11 + 5 27 1 0 0 ; B11 + +; bond group 12 + 3 33 1 0 0 ; B12 + +; bond group 13 + 9 33 1 0 0 ; B13 + +; bond group 14 + 17 34 1 0 0 ; B14 + +; bond group 15 + 16 34 1 0 0 ; B15 + +; bond group 16 + 19 34 1 0 0 ; B16 + +; bond group 17 + 21 35 1 0 0 ; B17 + +; bond group 18 + 2 35 1 0 0 ; B18 + +; bond group 19 + 15 35 1 0 0 ; B19 + +; bond group 20 + 4 36 1 0 0 ; B20 + +; bond group 21 + 3 36 1 0 0 ; B21 + +; bond group 22 + 5 36 1 0 0 ; B22 + +; bond group 23 + 7 36 1 0 0 ; B23 + + +[ angles ] +; i j k funct angle force.c. + +; angle group 1 + 1 2 15 2 120 0 ; A1 + 1 2 21 2 120 0 ; A1 + 2 1 3 2 120 0 ; A1 + 2 1 9 2 120 0 ; A1 + +; angle group 2 + 1 3 4 2 180 0 ; A2 + 1 9 10 2 180 0 ; A2 + 2 15 16 2 180 0 ; A2 + 2 21 22 2 180 0 ; A2 + +; angle group 3 + 3 1 9 2 120 0 ; A3 + 15 2 21 2 120 0 ; A3 + 5 4 7 2 120 0 ; A3 + 11 10 13 2 120 0 ; A3 + 17 16 19 2 120 0 ; A3 + 25 22 23 2 120 0 ; A3 + +; angle group 4 + 3 4 5 2 120 0 ; A4 + 3 4 7 2 120 0 ; A4 + 9 10 11 2 120 0 ; A4 + 9 10 13 2 120 0 ; A4 + 15 16 17 2 120 0 ; A4 + 15 16 19 2 120 0 ; A4 + 21 22 23 2 120 0 ; A4 + 21 22 25 2 120 0 ; A4 + +; angle group 5 + 4 5 6 2 180 0 ; A5 + 4 7 8 2 180 0 ; A5 + 10 11 12 2 180 0 ; A5 + 10 13 14 2 180 0 ; A5 + 16 17 18 2 180 0 ; A5 + 16 19 20 2 180 0 ; A5 + 22 23 24 2 180 0 ; A5 + 22 25 26 2 180 0 ; A5 + +; angle group 6 + 1 3 29 2 0 0 ; A6 + + +[ dihedrals ] +; i j k l funct dihedral force.c. mult. + +; dihedral group 1 + 3 1 2 21 1 0 0 2 ; D1 + 15 2 1 9 1 0 0 2 ; D1 + 21 2 1 3 1 0 0 2 ; D1 + 9 1 2 15 1 0 0 2 ; D1 + + +[ virtual_sites2 ] +; vs i j func param + 27 5 7 1 -1.3 + + +[ virtual_sites3 ] +; vs i j k func params + 28 1 3 9 1 -0.2 -0.7 + 33 1 3 9 2 -0.2 0.6 + 29 1 3 9 3 110 0.35 + 34 17 16 19 4 0.47 -0.25 0.32 + 35 21 2 15 4 0.12 0.3 0.21 + + +[ virtual_sites4 ] +; vs i j k l func params + 36 4 3 5 7 2 -0.2 0.7 0.2 + + +[ virtual_sitesn ] +; vs func def + 32 1 16 20 + 30 3 23 0.1 24 0.1 25 0.3 26 0.3 + 31 2 17 18 19 20 + diff --git a/tests/data/martini_v2.0_PEO_PS_CNP.itp b/tests/data/martini_v2.0_PEO_PS_CNP.itp new file mode 100644 index 0000000..9ed8300 --- /dev/null +++ b/tests/data/martini_v2.0_PEO_PS_CNP.itp @@ -0,0 +1,1160 @@ +; MARTINI FORCEFIELD V2.2 +; +; SJ MARRINK (last modified: 04-12-2012 by DdJ) +; +; NOTE 1: Bead definitinions in this file have not been changed with respect to +; martini_v2.1.itp. This file is purely here for clarity sake. Differences +; between V2.1 and V2.2 are created by the martinize script and can be found +; in martini_v2.2_aminoacids.itp. +; +; NOTE 2: Some special beads have been added: CNP for carbon nanoparticles, +; SCY and STY for polystyrene, and PEO for polyethylene oxide and CiEj surfactants. +; +; please cite: +; +; D.H. de Jong, G. Singh, W.F.D. Bennet, C. Arnarez, T.A. Wassenaar, L.V. Schafer, +; X. Periole, D.P. Tieleman, S.J. Marrink. +; Improved Parameters for the Martini Coarse-Grained Protein Force Field +; J. Chem. Theory Comput., DOI: 10.1021/ct300646g + +; L. Monticelli, S. Kandasamy, X. Periole, R. Larson, D.P. Tieleman, S.J. Marrink. +; The MARTINI coarse grained force field: extension to proteins. +; J. Chem. Th. Comp., 4:819-834, 2008. +; +; S.J. Marrink, H.J. Risselada, S. Yefimov, D.P. Tieleman, A.H. de Vries. +; The MARTINI forcefield: coarse grained model for biomolecular simulations. +; JPC-B, 111:7812-7824, 2007. +; +; and (if using lipid topologies): +; +; S.J. Marrink, A.H. de Vries, A.E. Mark. +; Coarse grained model for semi-quantitative lipid simulations. +; JPC-B, 108:750-760, 2004. + + +[ defaults ] +1 1 + +[ atomtypes ] + +; Currently eighteen particle types are defined, divided into four main categories +; (P, polar; N, intermediate; C, apolar; Q, charged) +; each of which has a number of sublevels (0,a,d, or ad) +; subtype 0 has no hydrogen bond forming capacities, +; subtype d has some hydrogen donor capacities, +; subtype a some hydrogen acceptor capacities, +; and subtype da has both donor and acceptor capacities +; or (1,2,3,4,5) where subtype 5 is more polar than 1. + +; Two main classes of particles are furthermore distinguished, namely +; STANDARD particles which are mapped using a 4:1 mapping scheme, and +; RING particles which are used for ring compounds mapped 2-3:1. +; A special BIG particle type is defined in addition to prevent freezing of CG water. +; Two AMINO acid particle types are used to avoid Q-C clashes inside proteins. + +; For reasons of computational efficiency, all particle masses are set to 72 amu, +; except for ring types which are set to 45 amu. +; For realistic dynamics, the particle masses should be adapted. +; This might require a reduction of the integration timestep, however. + +; name mass charge ptype c6 c12 + +; STANDARD types, 4:1 mapping +; polar type +P5 72.0 0.000 A 0.0 0.0 +P4 72.0 0.000 A 0.0 0.0 +P3 72.0 0.000 A 0.0 0.0 +P2 72.0 0.000 A 0.0 0.0 +P1 72.0 0.000 A 0.0 0.0 +; intermediate polar +Nda 72.0 0.000 A 0.0 0.0 +Nd 72.0 0.000 A 0.0 0.0 +Na 72.0 0.000 A 0.0 0.0 +N0 72.0 0.000 A 0.0 0.0 +; apolar +C5 72.0 0.000 A 0.0 0.0 +C4 72.0 0.000 A 0.0 0.0 +C3 72.0 0.000 A 0.0 0.0 +C2 72.0 0.000 A 0.0 0.0 +C1 72.0 0.000 A 0.0 0.0 +; charged +Qda 72.0 0.000 A 0.0 0.0 +Qd 72.0 0.000 A 0.0 0.0 +Qa 72.0 0.000 A 0.0 0.0 +Q0 72.0 0.000 A 0.0 0.0 + +; RING types, 2-3:1 mapping +SP5 45.0 0.000 A 0.0 0.0 +SP4 45.0 0.000 A 0.0 0.0 +SP3 45.0 0.000 A 0.0 0.0 +SP2 45.0 0.000 A 0.0 0.0 +SP1 45.0 0.000 A 0.0 0.0 +SNda 45.0 0.000 A 0.0 0.0 +SNd 45.0 0.000 A 0.0 0.0 +SNa 45.0 0.000 A 0.0 0.0 +SN0 45.0 0.000 A 0.0 0.0 +SC5 45.0 0.000 A 0.0 0.0 +SC4 45.0 0.000 A 0.0 0.0 +SC3 45.0 0.000 A 0.0 0.0 +SC2 45.0 0.000 A 0.0 0.0 +SC1 45.0 0.000 A 0.0 0.0 +SQda 45.0 0.000 A 0.0 0.0 +SQd 45.0 0.000 A 0.0 0.0 +SQa 45.0 0.000 A 0.0 0.0 +SQ0 45.0 0.000 A 0.0 0.0 + +; AMINO ACIDS (required for Q-C interactions inside proteins) +AC2 72.0 0.000 A 0.0 0.0 +AC1 72.0 0.000 A 0.0 0.0 + +; BIG particle type (to prevent freezing of water) +BP4 72.0 0.000 A 0.0 0.0 + +; CNP, for fullerene & carbon nanotubes +CNP 45.0 0.000 A 0.0 0.0 + +; polystyrene +STY 45.0 0.000 A 0.0 0.0 ; ring bead +SCY 45.0 0.000 A 0.0 0.0 ; backbone bead + +; PEO and CiEj surfactants +EO 45.0 0.000 A 0.0 0.0 + +; virtual sites (for Swarm-CG testing purposes only) +vT 0.0 0.000 V 0.0 0.0 + + + +[ nonbond_params ] + +; levels of LJ interaction: + +; O - supra attractive: (eps=5.6, s=0.47) +; I - attractive: (eps=5.0, s=0.47) +; II - almost attractive: (eps=4.5, s=0.47) +; III - semi attractive: (eps=4.0, s=0.47) +; IV - intermediate: (eps=3.5, s=0.47) +; V - almost intermediate: (eps=3.1, s=0.47) +; VI - semi repulsive: (eps=2.7, s=0.47) +; VII - almost repulsive: (eps=2.3, s=0.47) +; VIII - repulsive: (eps=2.0, s=0.47) +; IX - super repulsive: (eps=2.0, s=0.62) +; +; RINGS: for ring-ring interactions eps is reduced to 75%, sigma=0.43. + +; i j funda c6 c12 +; self terms + P5 P5 1 0.24145E-00 0.26027E-02 ; supra attractive + SP5 SP5 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + P4 P4 1 0.21558E-00 0.23238E-02 ; attractive + BP4 BP4 1 0.21558E-00 0.23238E-02 ; attractive + SP4 SP4 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + P3 P3 1 0.21558E-00 0.23238E-02 ; attractive + SP3 SP3 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + P2 P2 1 0.19402E-00 0.20914E-02 ; almost attractive + SP2 SP2 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + P1 P1 1 0.19402E-00 0.20914E-02 ; almost attractive + SP1 SP1 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + Nda Nda 1 0.19402E-00 0.20914E-02 ; almost attractive + SNda SNda 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + Nd Nd 1 0.17246E-00 0.18590E-02 ; semi attractive + SNd SNd 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + Na Na 1 0.17246E-00 0.18590E-02 ; semi attractive + SNa SNa 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + N0 N0 1 0.15091E-00 0.16267E-02 ; intermediate + SN0 SN0 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + C5 C5 1 0.15091E-00 0.16267E-02 ; intermediate + SC5 SC5 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + C4 C4 1 0.15091E-00 0.16267E-02 ; intermediate + SC4 SC4 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + C3 C3 1 0.15091E-00 0.16267E-02 ; intermediate + SC3 SC3 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + C2 C2 1 0.15091E-00 0.16267E-02 ; intermediate + AC2 AC2 1 0.15091E-00 0.16267E-02 ; intermediate + SC2 SC2 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + C1 C1 1 0.15091E-00 0.16267E-02 ; intermediate + AC1 AC1 1 0.15091E-00 0.16267E-02 ; intermediate + SC1 SC1 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + Qda Qda 1 0.24145E-00 0.26027E-02 ; supra attractive + SQda SQda 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + Qd Qd 1 0.21558E-00 0.23238E-02 ; attractive + SQd SQd 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + Qa Qa 1 0.21558E-00 0.23238E-02 ; attractive + SQa SQa 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + Q0 Q0 1 0.15091E-00 0.16267E-02 ; intermediate + SQ0 SQ0 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 +; cross terms + P5 SP5 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 P4 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 BP4 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 SP4 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 P3 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 SP3 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 P2 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 SP2 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 P1 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 SP1 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 Nda 1 0.21558E-00 0.23238E-02 ; attractive + P5 SNda 1 0.21558E-00 0.23238E-02 ; attractive + P5 Nd 1 0.21558E-00 0.23238E-02 ; attractive + P5 SNd 1 0.21558E-00 0.23238E-02 ; attractive + P5 Na 1 0.21558E-00 0.23238E-02 ; attractive + P5 SNa 1 0.21558E-00 0.23238E-02 ; attractive + P5 N0 1 0.15091E-00 0.16267E-02 ; intermediate + P5 SN0 1 0.15091E-00 0.16267E-02 ; intermediate + P5 C5 1 0.13366E-00 0.14408E-02 ; almost intermediate + P5 SC5 1 0.13366E-00 0.14408E-02 ; almost intermediate + P5 C4 1 0.11642E-00 0.12549E-02 ; semi repulsive + P5 SC4 1 0.11642E-00 0.12549E-02 ; semi repulsive + P5 C3 1 0.11642E-00 0.12549E-02 ; semi repulsive + P5 SC3 1 0.11642E-00 0.12549E-02 ; semi repulsive + P5 C2 1 0.99167E-01 0.10690E-02 ; almost repulsive + P5 AC2 1 0.99167E-01 0.10690E-02 ; almost repulsive + P5 SC2 1 0.99167E-01 0.10690E-02 ; almost repulsive + P5 C1 1 0.86233E-01 0.92953E-03 ; repulsive + P5 AC1 1 0.86233E-01 0.92953E-03 ; repulsive + P5 SC1 1 0.86233E-01 0.92953E-03 ; repulsive + P5 Qda 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 SQda 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 Qd 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 SQd 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 Qa 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 SQa 1 0.24145E-00 0.26027E-02 ; supra attractive + P5 Q0 1 0.21558E-00 0.23238E-02 ; attractive + P5 SQ0 1 0.21558E-00 0.23238E-02 ; attractive + SP5 P4 1 0.24145E-00 0.26027E-02 ; supra attractive + SP5 BP4 1 0.24145E-00 0.26027E-02 ; supra attractive + SP5 SP4 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP5 P3 1 0.24145E-00 0.26027E-02 ; supra attractive + SP5 SP3 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP5 P2 1 0.24145E-00 0.26027E-02 ; supra attractive + SP5 SP2 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP5 P1 1 0.24145E-00 0.26027E-02 ; supra attractive + SP5 SP1 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP5 Nda 1 0.21558E-00 0.23238E-02 ; attractive + SP5 SNda 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SP5 Nd 1 0.21558E-00 0.23238E-02 ; attractive + SP5 SNd 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SP5 Na 1 0.21558E-00 0.23238E-02 ; attractive + SP5 SNa 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SP5 N0 1 0.15091E-00 0.16267E-02 ; intermediate + SP5 SN0 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SP5 C5 1 0.13366E-00 0.14408E-02 ; almost intermediate + SP5 SC5 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SP5 C4 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP5 SC4 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SP5 C3 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP5 SC3 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SP5 C2 1 0.99167E-01 0.10690E-02 ; almost repulsive + SP5 AC2 1 0.99167E-01 0.10690E-02 ; almost repulsive + SP5 SC2 1 0.43617E-01 0.27572E-03 ; 75almost repulsive, s=0.43 + SP5 C1 1 0.86233E-01 0.92953E-03 ; repulsive + SP5 AC1 1 0.86233E-01 0.92953E-03 ; repulsive + SP5 SC1 1 0.37928E-01 0.23976E-03 ; 75repulsive, s=0.43 + SP5 Qda 1 0.24145E-00 0.26027E-02 ; supra attractive + SP5 SQda 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP5 Qd 1 0.24145E-00 0.26027E-02 ; supra attractive + SP5 SQd 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP5 Qa 1 0.24145E-00 0.26027E-02 ; supra attractive + SP5 SQa 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP5 Q0 1 0.21558E-00 0.23238E-02 ; attractive + SP5 SQ0 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + P4 BP4 1 0.76824E-00 0.26348E-01 ; supra attractive, s=0.57 + P4 SP4 1 0.21558E-00 0.23238E-02 ; attractive + P4 P3 1 0.21558E-00 0.23238E-02 ; attractive + P4 SP3 1 0.21558E-00 0.23238E-02 ; attractive + P4 P2 1 0.19402E-00 0.20914E-02 ; almost attractive + P4 SP2 1 0.19402E-00 0.20914E-02 ; almost attractive + P4 P1 1 0.19402E-00 0.20914E-02 ; almost attractive + P4 SP1 1 0.19402E-00 0.20914E-02 ; almost attractive + P4 Nda 1 0.17246E-00 0.18590E-02 ; semi attractive + P4 SNda 1 0.17246E-00 0.18590E-02 ; semi attractive + P4 Nd 1 0.17246E-00 0.18590E-02 ; semi attractive + P4 SNd 1 0.17246E-00 0.18590E-02 ; semi attractive + P4 Na 1 0.17246E-00 0.18590E-02 ; semi attractive + P4 SNa 1 0.17246E-00 0.18590E-02 ; semi attractive + P4 N0 1 0.15091E-00 0.16267E-02 ; intermediate + P4 SN0 1 0.15091E-00 0.16267E-02 ; intermediate + P4 C5 1 0.13366E-00 0.14408E-02 ; almost intermediate + P4 SC5 1 0.13366E-00 0.14408E-02 ; almost intermediate + P4 C4 1 0.11642E-00 0.12549E-02 ; semi repulsive + P4 SC4 1 0.11642E-00 0.12549E-02 ; semi repulsive + P4 C3 1 0.11642E-00 0.12549E-02 ; semi repulsive + P4 SC3 1 0.11642E-00 0.12549E-02 ; semi repulsive + P4 C2 1 0.99167E-01 0.10690E-02 ; almost repulsive + P4 AC2 1 0.99167E-01 0.10690E-02 ; almost repulsive + P4 SC2 1 0.99167E-01 0.10690E-02 ; almost repulsive + P4 C1 1 0.86233E-01 0.92953E-03 ; repulsive + P4 AC1 1 0.86233E-01 0.92953E-03 ; repulsive + P4 SC1 1 0.86233E-01 0.92953E-03 ; repulsive + P4 Qda 1 0.24145E-00 0.26027E-02 ; supra attractive + P4 SQda 1 0.24145E-00 0.26027E-02 ; supra attractive + P4 Qd 1 0.24145E-00 0.26027E-02 ; supra attractive + P4 SQd 1 0.24145E-00 0.26027E-02 ; supra attractive + P4 Qa 1 0.24145E-00 0.26027E-02 ; supra attractive + P4 SQa 1 0.24145E-00 0.26027E-02 ; supra attractive + P4 Q0 1 0.24145E-00 0.26027E-02 ; supra attractive + P4 SQ0 1 0.24145E-00 0.26027E-02 ; supra attractive + BP4 SP4 1 0.21558E-00 0.23238E-02 ; attractive + BP4 P3 1 0.21558E-00 0.23238E-02 ; attractive + BP4 SP3 1 0.21558E-00 0.23238E-02 ; attractive + BP4 P2 1 0.19402E-00 0.20914E-02 ; almost attractive + BP4 SP2 1 0.19402E-00 0.20914E-02 ; almost attractive + BP4 P1 1 0.19402E-00 0.20914E-02 ; almost attractive + BP4 SP1 1 0.19402E-00 0.20914E-02 ; almost attractive + BP4 Nda 1 0.17246E-00 0.18590E-02 ; semi attractive + BP4 SNda 1 0.17246E-00 0.18590E-02 ; semi attractive + BP4 Nd 1 0.17246E-00 0.18590E-02 ; semi attractive + BP4 SNd 1 0.17246E-00 0.18590E-02 ; semi attractive + BP4 Na 1 0.17246E-00 0.18590E-02 ; semi attractive + BP4 SNa 1 0.17246E-00 0.18590E-02 ; semi attractive + BP4 N0 1 0.15091E-00 0.16267E-02 ; intermediate + BP4 SN0 1 0.15091E-00 0.16267E-02 ; intermediate + BP4 C5 1 0.13366E-00 0.14408E-02 ; almost intermediate + BP4 SC5 1 0.13366E-00 0.14408E-02 ; almost intermediate + BP4 C4 1 0.11642E-00 0.12549E-02 ; semi repulsive + BP4 SC4 1 0.11642E-00 0.12549E-02 ; semi repulsive + BP4 C3 1 0.11642E-00 0.12549E-02 ; semi repulsive + BP4 SC3 1 0.11642E-00 0.12549E-02 ; semi repulsive + BP4 C2 1 0.99167E-01 0.10690E-02 ; almost repulsive + BP4 AC2 1 0.99167E-01 0.10690E-02 ; almost repulsive + BP4 SC2 1 0.99167E-01 0.10690E-02 ; almost repulsive + BP4 C1 1 0.86233E-01 0.92953E-03 ; repulsive + BP4 AC1 1 0.86233E-01 0.92953E-03 ; repulsive + BP4 SC1 1 0.86233E-01 0.92953E-03 ; repulsive + BP4 Qda 1 0.24145E-00 0.26027E-02 ; supra attractive + BP4 SQda 1 0.24145E-00 0.26027E-02 ; supra attractive + BP4 Qd 1 0.24145E-00 0.26027E-02 ; supra attractive + BP4 SQd 1 0.24145E-00 0.26027E-02 ; supra attractive + BP4 Qa 1 0.24145E-00 0.26027E-02 ; supra attractive + BP4 SQa 1 0.24145E-00 0.26027E-02 ; supra attractive + BP4 Q0 1 0.24145E-00 0.26027E-02 ; supra attractive + BP4 SQ0 1 0.24145E-00 0.26027E-02 ; supra attractive + SP4 P3 1 0.21558E-00 0.23238E-02 ; attractive + SP4 SP3 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SP4 P2 1 0.19402E-00 0.20914E-02 ; almost attractive + SP4 SP2 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP4 P1 1 0.19402E-00 0.20914E-02 ; almost attractive + SP4 SP1 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP4 Nda 1 0.17246E-00 0.18590E-02 ; semi attractive + SP4 SNda 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + SP4 Nd 1 0.17246E-00 0.18590E-02 ; semi attractive + SP4 SNd 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + SP4 Na 1 0.17246E-00 0.18590E-02 ; semi attractive + SP4 SNa 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + SP4 N0 1 0.15091E-00 0.16267E-02 ; intermediate + SP4 SN0 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SP4 C5 1 0.13366E-00 0.14408E-02 ; almost intermediate + SP4 SC5 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SP4 C4 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP4 SC4 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SP4 C3 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP4 SC3 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SP4 C2 1 0.99167E-01 0.10690E-02 ; almost repulsive + SP4 AC2 1 0.99167E-01 0.10690E-02 ; almost repulsive + SP4 SC2 1 0.43617E-01 0.27572E-03 ; 75almost repulsive, s=0.43 + SP4 C1 1 0.86233E-01 0.92953E-03 ; repulsive + SP4 AC1 1 0.86233E-01 0.92953E-03 ; repulsive + SP4 SC1 1 0.37928E-01 0.23976E-03 ; 75repulsive, s=0.43 + SP4 Qda 1 0.24145E-00 0.26027E-02 ; supra attractive + SP4 SQda 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP4 Qd 1 0.24145E-00 0.26027E-02 ; supra attractive + SP4 SQd 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP4 Qa 1 0.24145E-00 0.26027E-02 ; supra attractive + SP4 SQa 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP4 Q0 1 0.24145E-00 0.26027E-02 ; supra attractive + SP4 SQ0 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + P3 SP3 1 0.21558E-00 0.23238E-02 ; attractive + P3 P2 1 0.19402E-00 0.20914E-02 ; almost attractive + P3 SP2 1 0.19402E-00 0.20914E-02 ; almost attractive + P3 P1 1 0.19402E-00 0.20914E-02 ; almost attractive + P3 SP1 1 0.19402E-00 0.20914E-02 ; almost attractive + P3 Nda 1 0.19402E-00 0.20914E-02 ; almost attractive + P3 SNda 1 0.19402E-00 0.20914E-02 ; almost attractive + P3 Nd 1 0.19402E-00 0.20914E-02 ; almost attractive + P3 SNd 1 0.19402E-00 0.20914E-02 ; almost attractive + P3 Na 1 0.19402E-00 0.20914E-02 ; almost attractive + P3 SNa 1 0.19402E-00 0.20914E-02 ; almost attractive + P3 N0 1 0.15091E-00 0.16267E-02 ; intermediate + P3 SN0 1 0.15091E-00 0.16267E-02 ; intermediate + P3 C5 1 0.15091E-00 0.16267E-02 ; intermediate + P3 SC5 1 0.15091E-00 0.16267E-02 ; intermediate + P3 C4 1 0.13366E-00 0.14408E-02 ; almost intermediate + P3 SC4 1 0.13366E-00 0.14408E-02 ; almost intermediate + P3 C3 1 0.13366E-00 0.14408E-02 ; almost intermediate + P3 SC3 1 0.13366E-00 0.14408E-02 ; almost intermediate + P3 C2 1 0.11642E-00 0.12549E-02 ; semi repulsive + P3 AC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + P3 SC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + P3 C1 1 0.99167E-01 0.10690E-02 ; almost repulsive + P3 AC1 1 0.99167E-01 0.10690E-02 ; almost repulsive + P3 SC1 1 0.99167E-01 0.10690E-02 ; almost repulsive + P3 Qda 1 0.24145E-00 0.26027E-02 ; supra attractive + P3 SQda 1 0.24145E-00 0.26027E-02 ; supra attractive + P3 Qd 1 0.24145E-00 0.26027E-02 ; supra attractive + P3 SQd 1 0.24145E-00 0.26027E-02 ; supra attractive + P3 Qa 1 0.24145E-00 0.26027E-02 ; supra attractive + P3 SQa 1 0.24145E-00 0.26027E-02 ; supra attractive + P3 Q0 1 0.21558E-00 0.23238E-02 ; attractive + P3 SQ0 1 0.21558E-00 0.23238E-02 ; attractive + SP3 P2 1 0.19402E-00 0.20914E-02 ; almost attractive + SP3 SP2 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP3 P1 1 0.19402E-00 0.20914E-02 ; almost attractive + SP3 SP1 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP3 Nda 1 0.19402E-00 0.20914E-02 ; almost attractive + SP3 SNda 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP3 Nd 1 0.19402E-00 0.20914E-02 ; almost attractive + SP3 SNd 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP3 Na 1 0.19402E-00 0.20914E-02 ; almost attractive + SP3 SNa 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP3 N0 1 0.15091E-00 0.16267E-02 ; intermediate + SP3 SN0 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SP3 C5 1 0.15091E-00 0.16267E-02 ; intermediate + SP3 SC5 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SP3 C4 1 0.13366E-00 0.14408E-02 ; almost intermediate + SP3 SC4 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SP3 C3 1 0.13366E-00 0.14408E-02 ; almost intermediate + SP3 SC3 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SP3 C2 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP3 AC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP3 SC2 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SP3 C1 1 0.99167E-01 0.10690E-02 ; almost repulsive + SP3 AC1 1 0.99167E-01 0.10690E-02 ; almost repulsive + SP3 SC1 1 0.43617E-01 0.27572E-03 ; 75almost repulsive, s=0.43 + SP3 Qda 1 0.24145E-00 0.26027E-02 ; supra attractive + SP3 SQda 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP3 Qd 1 0.24145E-00 0.26027E-02 ; supra attractive + SP3 SQd 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP3 Qa 1 0.24145E-00 0.26027E-02 ; supra attractive + SP3 SQa 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SP3 Q0 1 0.21558E-00 0.23238E-02 ; attractive + SP3 SQ0 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + P2 SP2 1 0.19402E-00 0.20914E-02 ; almost attractive + P2 P1 1 0.19402E-00 0.20914E-02 ; almost attractive + P2 SP1 1 0.19402E-00 0.20914E-02 ; almost attractive + P2 Nda 1 0.19402E-00 0.20914E-02 ; almost attractive + P2 SNda 1 0.19402E-00 0.20914E-02 ; almost attractive + P2 Nd 1 0.19402E-00 0.20914E-02 ; almost attractive + P2 SNd 1 0.19402E-00 0.20914E-02 ; almost attractive + P2 Na 1 0.19402E-00 0.20914E-02 ; almost attractive + P2 SNa 1 0.19402E-00 0.20914E-02 ; almost attractive + P2 N0 1 0.17246E-00 0.18590E-02 ; semi attractive + P2 SN0 1 0.17246E-00 0.18590E-02 ; semi attractive + P2 C5 1 0.15091E-00 0.16267E-02 ; intermediate + P2 SC5 1 0.15091E-00 0.16267E-02 ; intermediate + P2 C4 1 0.15091E-00 0.16267E-02 ; intermediate + P2 SC4 1 0.15091E-00 0.16267E-02 ; intermediate + P2 C3 1 0.13366E-00 0.14408E-02 ; almost intermediate + P2 SC3 1 0.13366E-00 0.14408E-02 ; almost intermediate + P2 C2 1 0.11642E-00 0.12549E-02 ; semi repulsive + P2 AC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + P2 SC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + P2 C1 1 0.99167E-01 0.10690E-02 ; almost repulsive + P2 AC1 1 0.99167E-01 0.10690E-02 ; almost repulsive + P2 SC1 1 0.99167E-01 0.10690E-02 ; almost repulsive + P2 Qda 1 0.21558E-00 0.23238E-02 ; attractive + P2 SQda 1 0.21558E-00 0.23238E-02 ; attractive + P2 Qd 1 0.21558E-00 0.23238E-02 ; attractive + P2 SQd 1 0.21558E-00 0.23238E-02 ; attractive + P2 Qa 1 0.21558E-00 0.23238E-02 ; attractive + P2 SQa 1 0.21558E-00 0.23238E-02 ; attractive + P2 Q0 1 0.19402E-00 0.20914E-02 ; almost attractive + P2 SQ0 1 0.19402E-00 0.20914E-02 ; almost attractive + SP2 P1 1 0.19402E-00 0.20914E-02 ; almost attractive + SP2 SP1 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP2 Nda 1 0.19402E-00 0.20914E-02 ; almost attractive + SP2 SNda 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP2 Nd 1 0.19402E-00 0.20914E-02 ; almost attractive + SP2 SNd 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP2 Na 1 0.19402E-00 0.20914E-02 ; almost attractive + SP2 SNa 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP2 N0 1 0.17246E-00 0.18590E-02 ; semi attractive + SP2 SN0 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + SP2 C5 1 0.15091E-00 0.16267E-02 ; intermediate + SP2 SC5 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SP2 C4 1 0.15091E-00 0.16267E-02 ; intermediate + SP2 SC4 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SP2 C3 1 0.13366E-00 0.14408E-02 ; almost intermediate + SP2 SC3 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SP2 C2 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP2 AC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP2 SC2 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SP2 C1 1 0.99167E-01 0.10690E-02 ; almost repulsive + SP2 AC1 1 0.99167E-01 0.10690E-02 ; almost repulsive + SP2 SC1 1 0.43617E-01 0.27572E-03 ; 75almost repulsive, s=0.43 + SP2 Qda 1 0.21558E-00 0.23238E-02 ; attractive + SP2 SQda 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SP2 Qd 1 0.21558E-00 0.23238E-02 ; attractive + SP2 SQd 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SP2 Qa 1 0.21558E-00 0.23238E-02 ; attractive + SP2 SQa 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SP2 Q0 1 0.19402E-00 0.20914E-02 ; almost attractive + SP2 SQ0 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + P1 SP1 1 0.19402E-00 0.20914E-02 ; almost attractive + P1 Nda 1 0.19402E-00 0.20914E-02 ; almost attractive + P1 SNda 1 0.19402E-00 0.20914E-02 ; almost attractive + P1 Nd 1 0.19402E-00 0.20914E-02 ; almost attractive + P1 SNd 1 0.19402E-00 0.20914E-02 ; almost attractive + P1 Na 1 0.19402E-00 0.20914E-02 ; almost attractive + P1 SNa 1 0.19402E-00 0.20914E-02 ; almost attractive + P1 N0 1 0.17246E-00 0.18590E-02 ; semi attractive + P1 SN0 1 0.17246E-00 0.18590E-02 ; semi attractive + P1 C5 1 0.15091E-00 0.16267E-02 ; intermediate + P1 SC5 1 0.15091E-00 0.16267E-02 ; intermediate + P1 C4 1 0.15091E-00 0.16267E-02 ; intermediate + P1 SC4 1 0.15091E-00 0.16267E-02 ; intermediate + P1 C3 1 0.15091E-00 0.16267E-02 ; intermediate + P1 SC3 1 0.15091E-00 0.16267E-02 ; intermediate + P1 C2 1 0.13366E-00 0.14408E-02 ; almost intermediate + P1 AC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + P1 SC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + P1 C1 1 0.11642E-00 0.12549E-02 ; semi repulsive + P1 AC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + P1 SC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + P1 Qda 1 0.21558E-00 0.23238E-02 ; attractive + P1 SQda 1 0.21558E-00 0.23238E-02 ; attractive + P1 Qd 1 0.21558E-00 0.23238E-02 ; attractive + P1 SQd 1 0.21558E-00 0.23238E-02 ; attractive + P1 Qa 1 0.21558E-00 0.23238E-02 ; attractive + P1 SQa 1 0.21558E-00 0.23238E-02 ; attractive + P1 Q0 1 0.17246E-00 0.18590E-02 ; semi attractive + P1 SQ0 1 0.17246E-00 0.18590E-02 ; semi attractive + SP1 Nda 1 0.19402E-00 0.20914E-02 ; almost attractive + SP1 SNda 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP1 Nd 1 0.19402E-00 0.20914E-02 ; almost attractive + SP1 SNd 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP1 Na 1 0.19402E-00 0.20914E-02 ; almost attractive + SP1 SNa 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SP1 N0 1 0.17246E-00 0.18590E-02 ; semi attractive + SP1 SN0 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + SP1 C5 1 0.15091E-00 0.16267E-02 ; intermediate + SP1 SC5 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SP1 C4 1 0.15091E-00 0.16267E-02 ; intermediate + SP1 SC4 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SP1 C3 1 0.15091E-00 0.16267E-02 ; intermediate + SP1 SC3 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SP1 C2 1 0.13366E-00 0.14408E-02 ; almost intermediate + SP1 AC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + SP1 SC2 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SP1 C1 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP1 AC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP1 SC1 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SP1 Qda 1 0.21558E-00 0.23238E-02 ; attractive + SP1 SQda 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SP1 Qd 1 0.21558E-00 0.23238E-02 ; attractive + SP1 SQd 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SP1 Qa 1 0.21558E-00 0.23238E-02 ; attractive + SP1 SQa 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SP1 Q0 1 0.17246E-00 0.18590E-02 ; semi attractive + SP1 SQ0 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + Nda SNda 1 0.19402E-00 0.20914E-02 ; almost attractive + Nda Nd 1 0.19402E-00 0.20914E-02 ; almost attractive + Nda SNd 1 0.19402E-00 0.20914E-02 ; almost attractive + Nda Na 1 0.19402E-00 0.20914E-02 ; almost attractive + Nda SNa 1 0.19402E-00 0.20914E-02 ; almost attractive + Nda N0 1 0.15091E-00 0.16267E-02 ; intermediate + Nda SN0 1 0.15091E-00 0.16267E-02 ; intermediate + Nda C5 1 0.15091E-00 0.16267E-02 ; intermediate + Nda SC5 1 0.15091E-00 0.16267E-02 ; intermediate + Nda C4 1 0.13366E-00 0.14408E-02 ; almost intermediate + Nda SC4 1 0.13366E-00 0.14408E-02 ; almost intermediate + Nda C3 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nda SC3 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nda C2 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nda AC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nda SC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nda C1 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nda AC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nda SC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nda Qda 1 0.21558E-00 0.23238E-02 ; attractive + Nda SQda 1 0.21558E-00 0.23238E-02 ; attractive + Nda Qd 1 0.21558E-00 0.23238E-02 ; attractive + Nda SQd 1 0.21558E-00 0.23238E-02 ; attractive + Nda Qa 1 0.21558E-00 0.23238E-02 ; attractive + Nda SQa 1 0.21558E-00 0.23238E-02 ; attractive + Nda Q0 1 0.17246E-00 0.18590E-02 ; semi attractive + Nda SQ0 1 0.17246E-00 0.18590E-02 ; semi attractive + SNda Nd 1 0.19402E-00 0.20914E-02 ; almost attractive + SNda SNd 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SNda Na 1 0.19402E-00 0.20914E-02 ; almost attractive + SNda SNa 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SNda N0 1 0.15091E-00 0.16267E-02 ; intermediate + SNda SN0 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SNda C5 1 0.15091E-00 0.16267E-02 ; intermediate + SNda SC5 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SNda C4 1 0.13366E-00 0.14408E-02 ; almost intermediate + SNda SC4 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SNda C3 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNda SC3 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SNda C2 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNda AC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNda SC2 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SNda C1 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNda AC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNda SC1 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SNda Qda 1 0.21558E-00 0.23238E-02 ; attractive + SNda SQda 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SNda Qd 1 0.21558E-00 0.23238E-02 ; attractive + SNda SQd 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SNda Qa 1 0.21558E-00 0.23238E-02 ; attractive + SNda SQa 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SNda Q0 1 0.17246E-00 0.18590E-02 ; semi attractive + SNda SQ0 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + Nd SNd 1 0.17246E-00 0.18590E-02 ; semi attractive + Nd Na 1 0.19402E-00 0.20914E-02 ; almost attractive + Nd SNa 1 0.19402E-00 0.20914E-02 ; almost attractive + Nd N0 1 0.15091E-00 0.16267E-02 ; intermediate + Nd SN0 1 0.15091E-00 0.16267E-02 ; intermediate + Nd C5 1 0.15091E-00 0.16267E-02 ; intermediate + Nd SC5 1 0.15091E-00 0.16267E-02 ; intermediate + Nd C4 1 0.13366E-00 0.14408E-02 ; almost intermediate + Nd SC4 1 0.13366E-00 0.14408E-02 ; almost intermediate + Nd C3 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nd SC3 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nd C2 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nd AC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nd SC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nd C1 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nd AC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nd SC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + Nd Qda 1 0.21558E-00 0.23238E-02 ; attractive + Nd SQda 1 0.21558E-00 0.23238E-02 ; attractive + Nd Qd 1 0.17246E-00 0.18590E-02 ; semi attractive + Nd SQd 1 0.17246E-00 0.18590E-02 ; semi attractive + Nd Qa 1 0.21558E-00 0.23238E-02 ; attractive + Nd SQa 1 0.21558E-00 0.23238E-02 ; attractive + Nd Q0 1 0.17246E-00 0.18590E-02 ; semi attractive + Nd SQ0 1 0.17246E-00 0.18590E-02 ; semi attractive + SNd Na 1 0.19402E-00 0.20914E-02 ; almost attractive + SNd SNa 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + SNd N0 1 0.15091E-00 0.16267E-02 ; intermediate + SNd SN0 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SNd C5 1 0.15091E-00 0.16267E-02 ; intermediate + SNd SC5 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SNd C4 1 0.13366E-00 0.14408E-02 ; almost intermediate + SNd SC4 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SNd C3 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNd SC3 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SNd C2 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNd AC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNd SC2 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SNd C1 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNd AC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNd SC1 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SNd Qda 1 0.21558E-00 0.23238E-02 ; attractive + SNd SQda 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SNd Qd 1 0.17246E-00 0.18590E-02 ; semi attractive + SNd SQd 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + SNd Qa 1 0.21558E-00 0.23238E-02 ; attractive + SNd SQa 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SNd Q0 1 0.17246E-00 0.18590E-02 ; semi attractive + SNd SQ0 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + Na SNa 1 0.17246E-00 0.18590E-02 ; semi attractive + Na N0 1 0.15091E-00 0.16267E-02 ; intermediate + Na SN0 1 0.15091E-00 0.16267E-02 ; intermediate + Na C5 1 0.15091E-00 0.16267E-02 ; intermediate + Na SC5 1 0.15091E-00 0.16267E-02 ; intermediate + Na C4 1 0.13366E-00 0.14408E-02 ; almost intermediate + Na SC4 1 0.13366E-00 0.14408E-02 ; almost intermediate + Na C3 1 0.11642E-00 0.12549E-02 ; semi repulsive + Na SC3 1 0.11642E-00 0.12549E-02 ; semi repulsive + Na C2 1 0.11642E-00 0.12549E-02 ; semi repulsive + Na AC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + Na SC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + Na C1 1 0.11642E-00 0.12549E-02 ; semi repulsive + Na AC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + Na SC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + Na Qda 1 0.21558E-00 0.23238E-02 ; attractive + Na SQda 1 0.21558E-00 0.23238E-02 ; attractive + Na Qd 1 0.21558E-00 0.23238E-02 ; attractive + Na SQd 1 0.21558E-00 0.23238E-02 ; attractive + Na Qa 1 0.17246E-00 0.18590E-02 ; semi attractive + Na SQa 1 0.17246E-00 0.18590E-02 ; semi attractive + Na Q0 1 0.17246E-00 0.18590E-02 ; semi attractive + Na SQ0 1 0.17246E-00 0.18590E-02 ; semi attractive + SNa N0 1 0.15091E-00 0.16267E-02 ; intermediate + SNa SN0 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SNa C5 1 0.15091E-00 0.16267E-02 ; intermediate + SNa SC5 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SNa C4 1 0.13366E-00 0.14408E-02 ; almost intermediate + SNa SC4 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SNa C3 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNa SC3 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SNa C2 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNa AC2 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNa SC2 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SNa C1 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNa AC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNa SC1 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SNa Qda 1 0.21558E-00 0.23238E-02 ; attractive + SNa SQda 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SNa Qd 1 0.21558E-00 0.23238E-02 ; attractive + SNa SQd 1 0.94820E-01 0.59939E-03 ; 75attractive, s=0.43 + SNa Qa 1 0.17246E-00 0.18590E-02 ; semi attractive + SNa SQa 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + SNa Q0 1 0.17246E-00 0.18590E-02 ; semi attractive + SNa SQ0 1 0.75856E-01 0.47952E-03 ; 75semi attractive, s=0.43 + N0 SN0 1 0.15091E-00 0.16267E-02 ; intermediate + N0 C5 1 0.15091E-00 0.16267E-02 ; intermediate + N0 SC5 1 0.15091E-00 0.16267E-02 ; intermediate + N0 C4 1 0.15091E-00 0.16267E-02 ; intermediate + N0 SC4 1 0.15091E-00 0.16267E-02 ; intermediate + N0 C3 1 0.15091E-00 0.16267E-02 ; intermediate + N0 SC3 1 0.15091E-00 0.16267E-02 ; intermediate + N0 C2 1 0.13366E-00 0.14408E-02 ; almost intermediate + N0 AC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + N0 SC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + N0 C1 1 0.11642E-00 0.12549E-02 ; semi repulsive + N0 AC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + N0 SC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + N0 Qda 1 0.15091E-00 0.16267E-02 ; intermediate + N0 SQda 1 0.15091E-00 0.16267E-02 ; intermediate + N0 Qd 1 0.15091E-00 0.16267E-02 ; intermediate + N0 SQd 1 0.15091E-00 0.16267E-02 ; intermediate + N0 Qa 1 0.15091E-00 0.16267E-02 ; intermediate + N0 SQa 1 0.15091E-00 0.16267E-02 ; intermediate + N0 Q0 1 0.15091E-00 0.16267E-02 ; intermediate + N0 SQ0 1 0.15091E-00 0.16267E-02 ; intermediate + SN0 C5 1 0.15091E-00 0.16267E-02 ; intermediate + SN0 SC5 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SN0 C4 1 0.15091E-00 0.16267E-02 ; intermediate + SN0 SC4 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SN0 C3 1 0.15091E-00 0.16267E-02 ; intermediate + SN0 SC3 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SN0 C2 1 0.13366E-00 0.14408E-02 ; almost intermediate + SN0 AC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + SN0 SC2 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SN0 C1 1 0.11642E-00 0.12549E-02 ; semi repulsive + SN0 AC1 1 0.11642E-00 0.12549E-02 ; semi repulsive + SN0 SC1 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SN0 Qda 1 0.15091E-00 0.16267E-02 ; intermediate + SN0 SQda 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SN0 Qd 1 0.15091E-00 0.16267E-02 ; intermediate + SN0 SQd 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SN0 Qa 1 0.15091E-00 0.16267E-02 ; intermediate + SN0 SQa 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SN0 Q0 1 0.15091E-00 0.16267E-02 ; intermediate + SN0 SQ0 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + C5 SC5 1 0.15091E-00 0.16267E-02 ; intermediate + C5 C4 1 0.15091E-00 0.16267E-02 ; intermediate + C5 SC4 1 0.15091E-00 0.16267E-02 ; intermediate + C5 C3 1 0.15091E-00 0.16267E-02 ; intermediate + C5 SC3 1 0.15091E-00 0.16267E-02 ; intermediate + C5 C2 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 AC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 SC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 C1 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 AC1 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 SC1 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 Qda 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 SQda 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 Qd 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 SQd 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 Qa 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 SQa 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 Q0 1 0.13366E-00 0.14408E-02 ; almost intermediate + C5 SQ0 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC5 C4 1 0.15091E-00 0.16267E-02 ; intermediate + SC5 SC4 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SC5 C3 1 0.15091E-00 0.16267E-02 ; intermediate + SC5 SC3 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SC5 C2 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC5 AC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC5 SC2 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SC5 C1 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC5 AC1 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC5 SC1 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SC5 Qda 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC5 SQda 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SC5 Qd 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC5 SQd 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SC5 Qa 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC5 SQa 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SC5 Q0 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC5 SQ0 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + C4 SC4 1 0.15091E-00 0.16267E-02 ; intermediate + C4 C3 1 0.15091E-00 0.16267E-02 ; intermediate + C4 SC3 1 0.15091E-00 0.16267E-02 ; intermediate + C4 C2 1 0.13366E-00 0.14408E-02 ; almost intermediate + C4 AC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + C4 SC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + C4 C1 1 0.13366E-00 0.14408E-02 ; almost intermediate + C4 AC1 1 0.13366E-00 0.14408E-02 ; almost intermediate + C4 SC1 1 0.13366E-00 0.14408E-02 ; almost intermediate + C4 Qda 1 0.11642E-00 0.12549E-02 ; semi repulsive + C4 SQda 1 0.11642E-00 0.12549E-02 ; semi repulsive + C4 Qd 1 0.11642E-00 0.12549E-02 ; semi repulsive + C4 SQd 1 0.11642E-00 0.12549E-02 ; semi repulsive + C4 Qa 1 0.11642E-00 0.12549E-02 ; semi repulsive + C4 SQa 1 0.11642E-00 0.12549E-02 ; semi repulsive + C4 Q0 1 0.11642E-00 0.12549E-02 ; semi repulsive + C4 SQ0 1 0.11642E-00 0.12549E-02 ; semi repulsive + SC4 C3 1 0.15091E-00 0.16267E-02 ; intermediate + SC4 SC3 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SC4 C2 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC4 AC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC4 SC2 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SC4 C1 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC4 AC1 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC4 SC1 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + SC4 Qda 1 0.11642E-00 0.12549E-02 ; semi repulsive + SC4 SQda 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SC4 Qd 1 0.11642E-00 0.12549E-02 ; semi repulsive + SC4 SQd 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SC4 Qa 1 0.11642E-00 0.12549E-02 ; semi repulsive + SC4 SQa 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + SC4 Q0 1 0.11642E-00 0.12549E-02 ; semi repulsive + SC4 SQ0 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + C3 SC3 1 0.15091E-00 0.16267E-02 ; intermediate + C3 C2 1 0.15091E-00 0.16267E-02 ; intermediate + C3 AC2 1 0.15091E-00 0.16267E-02 ; intermediate + C3 SC2 1 0.15091E-00 0.16267E-02 ; intermediate + C3 C1 1 0.15091E-00 0.16267E-02 ; intermediate + C3 AC1 1 0.15091E-00 0.16267E-02 ; intermediate + C3 SC1 1 0.15091E-00 0.16267E-02 ; intermediate + C3 Qda 1 0.99167E-01 0.10690E-02 ; almost repulsive + C3 SQda 1 0.99167E-01 0.10690E-02 ; almost repulsive + C3 Qd 1 0.99167E-01 0.10690E-02 ; almost repulsive + C3 SQd 1 0.99167E-01 0.10690E-02 ; almost repulsive + C3 Qa 1 0.99167E-01 0.10690E-02 ; almost repulsive + C3 SQa 1 0.99167E-01 0.10690E-02 ; almost repulsive + C3 Q0 1 0.99167E-01 0.10690E-02 ; almost repulsive + C3 SQ0 1 0.99167E-01 0.10690E-02 ; almost repulsive + SC3 C2 1 0.15091E-00 0.16267E-02 ; intermediate + SC3 AC2 1 0.15091E-00 0.16267E-02 ; intermediate + SC3 SC2 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SC3 C1 1 0.15091E-00 0.16267E-02 ; intermediate + SC3 AC1 1 0.15091E-00 0.16267E-02 ; intermediate + SC3 SC1 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SC3 Qda 1 0.99167E-01 0.10690E-02 ; almost repulsive + SC3 SQda 1 0.43617E-01 0.27572E-03 ; 75almost repulsive, s=0.43 + SC3 Qd 1 0.99167E-01 0.10690E-02 ; almost repulsive + SC3 SQd 1 0.43617E-01 0.27572E-03 ; 75almost repulsive, s=0.43 + SC3 Qa 1 0.99167E-01 0.10690E-02 ; almost repulsive + SC3 SQa 1 0.43617E-01 0.27572E-03 ; 75almost repulsive, s=0.43 + SC3 Q0 1 0.99167E-01 0.10690E-02 ; almost repulsive + SC3 SQ0 1 0.43617E-01 0.27572E-03 ; 75almost repulsive, s=0.43 + C2 AC2 1 0.15091E-00 0.16267E-02 ; intermediate + C2 SC2 1 0.15091E-00 0.16267E-02 ; intermediate + C2 C1 1 0.15091E-00 0.16267E-02 ; intermediate + C2 AC1 1 0.15091E-00 0.16267E-02 ; intermediate + C2 SC1 1 0.15091E-00 0.16267E-02 ; intermediate + C2 Qda 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C2 SQda 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C2 Qd 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C2 SQd 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C2 Qa 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C2 SQa 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C2 Q0 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C2 SQ0 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + AC2 SC2 1 0.15091E-00 0.16267E-02 ; intermediate + AC2 C1 1 0.15091E-00 0.16267E-02 ; intermediate + AC2 AC1 1 0.15091E-00 0.16267E-02 ; intermediate + AC2 SC1 1 0.15091E-00 0.16267E-02 ; intermediate + AC2 Qda 1 0.86233E-01 0.92953E-03 ; repulsive + AC2 SQda 1 0.86233E-01 0.92953E-03 ; repulsive + AC2 Qd 1 0.86233E-01 0.92953E-03 ; repulsive + AC2 SQd 1 0.86233E-01 0.92953E-03 ; repulsive + AC2 Qa 1 0.86233E-01 0.92953E-03 ; repulsive + AC2 SQa 1 0.86233E-01 0.92953E-03 ; repulsive + AC2 Q0 1 0.86233E-01 0.92953E-03 ; repulsive + AC2 SQ0 1 0.86233E-01 0.92953E-03 ; repulsive + SC2 C1 1 0.15091E-00 0.16267E-02 ; intermediate + SC2 AC1 1 0.15091E-00 0.16267E-02 ; intermediate + SC2 SC1 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SC2 Qda 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC2 SQda 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC2 Qd 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC2 SQd 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC2 Qa 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC2 SQa 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC2 Q0 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC2 SQ0 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C1 AC1 1 0.15091E-00 0.16267E-02 ; intermediate + C1 SC1 1 0.15091E-00 0.16267E-02 ; intermediate + C1 Qda 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C1 SQda 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C1 Qd 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C1 SQd 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C1 Qa 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C1 SQa 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C1 Q0 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + C1 SQ0 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + AC1 SC1 1 0.15091E-00 0.16267E-02 ; intermediate + AC1 Qda 1 0.86233E-01 0.92953E-03 ; repulsive + AC1 SQda 1 0.86233E-01 0.92953E-03 ; repulsive + AC1 Qd 1 0.86233E-01 0.92953E-03 ; repulsive + AC1 SQd 1 0.86233E-01 0.92953E-03 ; repulsive + AC1 Qa 1 0.86233E-01 0.92953E-03 ; repulsive + AC1 SQa 1 0.86233E-01 0.92953E-03 ; repulsive + AC1 Q0 1 0.86233E-01 0.92953E-03 ; repulsive + AC1 SQ0 1 0.86233E-01 0.92953E-03 ; repulsive + SC1 Qda 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC1 SQda 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC1 Qd 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC1 SQd 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC1 Qa 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC1 SQa 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC1 Q0 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SC1 SQ0 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + Qda SQda 1 0.24145E-00 0.26027E-02 ; supra attractive + Qda Qd 1 0.24145E-00 0.26027E-02 ; supra attractive + Qda SQd 1 0.24145E-00 0.26027E-02 ; supra attractive + Qda Qa 1 0.24145E-00 0.26027E-02 ; supra attractive + Qda SQa 1 0.24145E-00 0.26027E-02 ; supra attractive + Qda Q0 1 0.19402E-00 0.20914E-02 ; almost attractive + Qda SQ0 1 0.19402E-00 0.20914E-02 ; almost attractive + SQda Qd 1 0.24145E-00 0.26027E-02 ; supra attractive + SQda SQd 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SQda Qa 1 0.24145E-00 0.26027E-02 ; supra attractive + SQda SQa 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SQda Q0 1 0.19402E-00 0.20914E-02 ; almost attractive + SQda SQ0 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + Qd SQd 1 0.21558E-00 0.23238E-02 ; attractive + Qd Qa 1 0.24145E-00 0.26027E-02 ; supra attractive + Qd SQa 1 0.24145E-00 0.26027E-02 ; supra attractive + Qd Q0 1 0.19402E-00 0.20914E-02 ; almost attractive + Qd SQ0 1 0.19402E-00 0.20914E-02 ; almost attractive + SQd Qa 1 0.24145E-00 0.26027E-02 ; supra attractive + SQd SQa 1 0.10620E-00 0.67132E-03 ; 75supra attractive, s=0.43 + SQd Q0 1 0.19402E-00 0.20914E-02 ; almost attractive + SQd SQ0 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + Qa SQa 1 0.21558E-00 0.23238E-02 ; attractive + Qa Q0 1 0.19402E-00 0.20914E-02 ; almost attractive + Qa SQ0 1 0.19402E-00 0.20914E-02 ; almost attractive + SQa Q0 1 0.19402E-00 0.20914E-02 ; almost attractive + SQa SQ0 1 0.85338E-01 0.53946E-03 ; 75almost attractive, s=0.43 + Q0 SQ0 1 0.15091E-00 0.16267E-02 ; intermediate + +; +; CARBON NANOPARTICLES +; LM, 12/2010 +; revised by LM, 10/2012 +; +; please cite: +; J. Wong-ekkabut, S. Baoukina, W. Triampo, I-M.Tang, D. P. Tieleman, L. Monticelli. +; Computer simulation study of fullerene translocation through lipid membranes +; Nature Nanotechnology (2008), 3, 363-368. +; +; L Monticelli +; On atomistic and coarse-grained models of C60 fullerene +; J Chem Theory Comput (2012), 8, 1370-1378. +; + CNP CNP 1 6.8271E-02 4.3156E-04 ; e 3.6 ; small ; +; + P5 CNP 1 0.11642E-00 0.12549E-02 ; e 2.7 ; big ; (same as for benzene) + SP5 CNP 1 5.1203E-02 3.2367E-04 ; e 2.7 ; small ; (same as for benzene) + P4 CNP 1 1.1642E-01 1.2549E-03 ; e 2.7 ; big ; ********************* water + BP4 CNP 1 1.1642E-01 1.2549E-03 ; e 2.7 ; big ; (same as for benzene) + SP4 CNP 1 5.1203E-02 3.2367E-04 ; e 2.7 ; small ; (same as for benzene) + P3 CNP 1 1.3366E-01 1.4408E-03 ; e 3.1 ; big ; (same as for benzene) + SP3 CNP 1 5.8789E-02 3.7162E-04 ; e 3.1 ; small ; (same as for benzene) + P2 CNP 1 1.4013E-01 1.5105E-03 ; e 3.25 ; big ; ********************* ethanol (benzene has e=3.5) + SP2 CNP 1 6.1633E-02 3.8961E-04 ; e 3.25 ; small ; (benzene has e=3.5) + P1 CNP 1 1.4013E-01 1.5105E-03 ; e 3.25 ; big ; (benzene has e=3.5) + SP1 CNP 1 6.1633E-02 3.8961E-04 ; e 3.25 ; small ; (benzene has e=3.5) + Nda CNP 1 1.4229E-01 1.5337E-03 ; e 3.3 ; big ; ********************* butanol (benzene has e=3.1) + SNda CNP 1 6.2581E-02 3.9560E-04 ; e 3.3 ; small ; (benzene has e=3.1) + Nd CNP 1 1.4013E-01 1.5105E-03 ; e 3.25 ; big ; (benzene has e=3.1) + SNd CNP 1 6.1633E-02 3.8961E-04 ; e 3.25 ; small ; (benzene has e=3.1) + Na CNP 1 1.4013E-01 1.5105E-03 ; e 3.25 ; big ; ********************* acetone (benzene has e=3.1) + SNa CNP 1 6.1633E-02 3.8961E-04 ; e 3.25 ; small ; (benzene has e=3.1) + N0 CNP 1 1.4660E-01 1.5802E-03 ; e 3.4 ; big ; (benzene has e=3.5) + SN0 CNP 1 6.4478E-02 4.0759E-04 ; e 3.4 ; small ; (benzene has e=3.5) + C5 CNP 1 1.4660E-01 1.5802E-03 ; e 3.4 ; big ; (benzene has e=3.5) + SC5 CNP 1 6.6374E-02 4.1958E-04 ; e 3.5 ; small ; (same as for benzene) + C4 CNP 1 1.5091E-01 1.6267E-03 ; e 3.5 ; big ; (same as for benzene) + SC4 CNP 1 6.8271E-02 4.3156E-04 ; e 3.6 ; small ; ******************** benzene (benzene has e=3.5) + C3 CNP 1 1.5091E-01 1.6267E-03 ; e 3.5 ; big ; (same as for benzene) + SC3 CNP 1 6.6374E-02 4.1958E-04 ; e 3.5 ; small ; (same as for benzene) + C2 CNP 1 1.4229E-01 1.5337E-03 ; e 3.3 ; big ; (benzene has e=3.1) + AC2 CNP 1 1.4229E-01 1.5337E-03 ; e 3.3 ; big ; (benzene has e=3.1) + SC2 CNP 1 6.2581E-02 3.9560E-04 ; e 3.3 ; small ; (benzene has e=3.1) + C1 CNP 1 1.3582E-01 1.4640E-03 ; e 3.15 ; big ; ********************* octane (benzene has e=3.1) + AC1 CNP 1 1.3366E-01 1.4408E-03 ; e 3.1 ; big ; (same as for benzene) + SC1 CNP 1 6.6374E-02 4.1958E-04 ; e 3.5 ; small ; ********************* cyclohexane (benzene has e=3.1) + Qda CNP 1 0.11642E-00 0.12549E-02 ; e 2.7 ; big ; (same as for benzene) + Qd CNP 1 0.11642E-00 0.12549E-02 ; e 2.7 ; big ; (same as for benzene) + Qa CNP 1 0.11642E-00 0.12549E-02 ; e 2.7 ; big ; (same as for benzene) + Q0 CNP 1 0.11642E-00 0.12549E-02 ; e 2.7 ; big ; (same as for benzene) + SQda CNP 1 5.1203E-02 3.2367E-04 ; e 2.7 ; small ; (same as for benzene) + SQd CNP 1 5.1203E-02 3.2367E-04 ; e 2.7 ; small ; (same as for benzene) + SQa CNP 1 5.1203E-02 3.2367E-04 ; e 2.7 ; small ; (same as for benzene) + SQ0 CNP 1 5.1203E-02 3.2367E-04 ; e 2.7 ; small ; (same as for benzene) +; +; polystyrene +; GR and LM, 10/2012 +; please notice that not all interactions have been validated +; +; please cite: +; G. Rossi, L. Monticelli, S. R. Puisto, I. Vattulainen and T. Ala-Nissila +; Coarse-graining polymers with the MARTINI force-field: polystyrene as a benchmark case +; Soft Matter (2011), 7, 698 +; + STY STY 1 0.45601E-01 0.21661E-03 ; epsi=75%(3.2), s=0.41 + SCY SCY 1 0.66375E-01 0.41957E-03 ; epsi=75%(3.5), s=0.43 + STY SCY 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 +; + P5 STY 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP5 STY 1 5.1203E-02 3.2367E-04 ; 75semi repulsive, s=0.43 + P4 STY 1 0.11642E-00 0.12549E-02 ; semi repulsive, used for PS-water interactions + BP4 STY 1 0.11642E-00 0.12549E-02 ; semi repulsive, used for PS-water interactions + SP4 STY 1 0.51203E-01 0.32367E-03 ; 75semi repulsive, s=0.43 + P3 STY 1 0.13366E-00 0.14408E-02 ; almost intermediate + SP3 STY 1 5.8789E-02 3.7162E-04 ; 75almost intermediate, s=0.43 + P2 STY 1 0.15091E-00 0.16267E-02 ; intermediate + SP2 STY 1 6.6374E-02 4.1958E-04 ; 75intermediate, s=0.43 + P1 STY 1 0.15091E-00 0.16267E-02 ; intermediate + SP1 STY 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + Nda STY 1 0.13366E-00 0.14408E-02 ; almost intermediate + SNda STY 1 5.8789E-02 3.7162E-04 ; 75almost intermediate, s=0.43 + Nd STY 1 0.13366E-00 0.14408E-02 ; almost intermediate + SNd STY 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + Na STY 1 0.13366E-00 0.14408E-02 ; almost intermediate + SNa STY 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + N0 STY 1 0.15091E-00 0.16267E-02 ; intermediate + SN0 STY 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + C5 STY 1 0.15091E-00 0.16267E-02 ; intermediate + SC5 STY 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + C4 STY 1 0.15091E-00 0.16267E-02 ; intermediate + SC4 STY 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43, used for PS-benzene interactions + STY C3 1 0.15091E-00 0.16267E-02 ; intermediate + STY SC3 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + STY C2 1 0.13366E-00 0.14408E-02 ; almost intermediate + STY AC2 1 0.13366E-00 0.14408E-02 ; almost intermediate + STY SC2 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + STY C1 1 0.13366E-00 0.14408E-02 ; almost intermediate + STY AC1 1 0.13366E-00 0.14408E-02 ; almost intermediate + STY SC1 1 0.62581E-01 0.39560E-03 ; epsi=75%(3.3), s=0.43, used for PS-cyclohexane interactions + STY Qda 1 0.11642E-00 0.12549E-02 ; semi repulsive + STY Qd 1 0.11642E-00 0.12549E-02 ; semi repulsive + STY Qa 1 0.11642E-00 0.12549E-02 ; semi repulsive + STY Q0 1 0.11642E-00 0.12549E-02 ; semi repulsive + STY SQda 1 0.51203E-01 0.32367E-03 ; 75semi repulsive, s=0.43 + STY SQd 1 0.51203E-01 0.32367E-03 ; 75semi repulsive, s=0.43 + STY SQa 1 0.51203E-01 0.32367E-03 ; 75semi repulsive, s=0.43 + STY SQ0 1 0.51203E-01 0.32367E-03 ; 75semi repulsive, s=0.43 +; + P5 SCY 1 0.86233E-01 0.92953E-03 ; repulsive + SP5 SCY 1 3.7928E-02 2.3976E-04 ; 75repulsive, s=0.43 + P4 SCY 1 0.86233E-01 0.92953E-03 ; repulsive, used for PS-water interactions + BP4 SCY 1 0.86233E-01 0.92953E-03 ; repulsive, used for PS-water interactions + SP4 SCY 1 0.37928E-01 0.23976E-03 ; 75repulsive, s=0.43 + P3 SCY 1 0.99167E-01 0.10690E-02 ; almost repulsive + SP3 SCY 1 4.3617E-02 2.7572E-04 ; 75almost repulsive, s=0.43 + P2 SCY 1 0.99167E-01 0.10690E-02 ; almost repulsive + SP2 SCY 1 4.3617E-02 2.7572E-04 ; 75almost repulsive, s=0.43 + P1 SCY 1 0.11642E-00 0.12549E-02 ; semi repulsive + SP1 SCY 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + Nda SCY 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNda SCY 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + Nd SCY 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNd SCY 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + Na SCY 1 0.11642E-00 0.12549E-02 ; semi repulsive + SNa SCY 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + N0 SCY 1 0.11642E-00 0.12549E-02 ; semi repulsive + SN0 SCY 1 0.51203E-01 0.32367E-03 ; 75semi repulsive s=0.43 + C5 SCY 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC5 SCY 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + C4 SCY 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC4 SCY 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43, used for PS-benzene interactions + C3 SCY 1 0.13366E-00 0.14408E-02 ; almost intermediate + SC3 SCY 1 0.58789E-01 0.37162E-03 ; 75almost intermediate, s=0.43 + C2 SCY 1 1.5091E-01 1.6267E-03 ; intermediate + AC2 SCY 1 1.5091E-01 1.6267E-03 ; intermediate + SC2 SCY 1 6.6374E-02 4.1958E-04 ; 75intermediate, s=0.43 + C1 SCY 1 1.5091E-01 1.6267E-03 ; intermediate + AC1 SCY 1 1.5091E-01 1.6267E-03 ; intermediate + SCY SC1 1 0.66375E-01 0.41957E-03 ; 75intermediate, s=0.43 + SCY Qda 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SCY Qd 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SCY Qa 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SCY Q0 1 0.45440E-00 0.25810E-01 ; super repulsive, s=0.62 + SQda SCY 1 0.86233E-01 0.92953E-03 ; repulsive + SQd SCY 1 0.86233E-01 0.92953E-03 ; repulsive + SQa SCY 1 0.86233E-01 0.92953E-03 ; repulsive + SQ0 SCY 1 0.86233E-01 0.92953E-03 ; repulsive +; +; +; PEO +; FG and LM, 12/2017 +; please notice that not all interactions have been validated +; +; please cite: +; On MARTINI models for polyethylene oxide +; F. Grunewald, A.H. de Vries, S.J. Marrink, L. Monticelli +; J Phys Chem B (2018), in preparation +; +EO EO 1 6.44779031E-02 4.07588234E-04 ; eps=3.4 +; +EO P5 1 1.50909015E-01 1.62668076E-03 ; eps=3.5 +EO SP5 1 6.63743120E-02 4.19576123E-04 ; EPS=3.5 +EO BP4 1 7.96491744E-02 5.03491348E-04 ; EPS=3.5 +EO P4 1 1.50909015E-01 1.62668076E-03 ; ** eps=3.5 +EO SP4 1 6.63743120E-02 4.19576123E-04 ; EPS=3.5 +EO P3 1 1.50909015E-01 1.62668076E-03 ; EPS=3.5 +EO SP3 1 6.63743120E-02 4.19576123E-04 ; EPS=3.5 +EO P2 1 1.33662270E-01 1.44077439E-03 ; eps=3.1 +EO SP2 1 5.87886764E-02 3.71624566E-04 ; eps=3.1 +EO P1 1 1.33662270E-01 1.44077439E-03 ; ** eps=3.1 +EO SP1 1 5.87886764E-02 3.71624566E-04 ; eps=3.1 +EO Nda 1 1.33662270E-01 1.44077439E-03 ; eps=3.1 +EO SNda 1 5.87886764E-02 3.71624566E-04 ; eps=3.1 +EO Nd 1 1.33662270E-01 1.44077439E-03 ; eps=3.1 +EO SNd 1 5.87886764E-02 3.71624566E-04 ; eps=3.1 +EO Na 1 1.33662270E-01 1.44077439E-03 ; ** eps=3.1 +EO SNa 1 5.87886764E-02 3.71624566E-04 ; eps=3.1 +EO N0 1 1.33662270E-01 1.44077439E-03 ; eps=3.1 +EO SN0 1 5.87886764E-02 3.71624566E-04 ; eps=3.1 +EO C5 1 1.27194741E-01 1.37105950E-03 ; eps=2.95 +EO SC5 1 6.71328756E-02 4.24371279E-04 ; ** eps=2.95 * 0.9 +EO C4 1 1.27194741E-01 1.37105950E-03 ; eps=2.95 +EO SC4 1 5.59440630E-02 3.53642733E-04 ; eps=2.95 +EO C3 1 1.27194741E-01 1.37105950E-03 ; eps=2.95 +EO SC3 1 5.59440630E-02 3.53642733E-04 ; eps=2.95 +EO C2 1 1.16415526E-01 1.25486802E-03 ; eps=2.7 +EO SC2 1 5.12030407E-02 3.23673009E-04 ; eps=2.7 +EO C1 1 1.09085659E-01 1.17585781E-03 ; eps=2.53 +EO SC1 1 5.75749747E-02 3.63952317E-04 ; ** eps=2.53 * 0.9 +EO AC1 1 1.09085659E-01 1.17585781E-03 ; eps=2.53 +EO AC2 1 1.16415526E-01 1.25486802E-03 ; eps=2.7 +EO Qa 1 1.50909015E-01 1.62668076E-03 ; EPS=3.5 +EO Qda 1 1.50909015E-01 1.62668076E-03 ; EPS=3.5 +EO Qd 1 1.50909015E-01 1.62668076E-03 ; EPS=3.5 +EO Q0 1 1.50909015E-01 1.62668076E-03 ; EPS=3.5 +EO SQda 1 6.63743120E-02 4.19576123E-04 ; EPS=3.5 +EO SQd 1 6.63743120E-02 4.19576123E-04 ; EPS=3.5 +EO SQa 1 6.63743120E-02 4.19576123E-04 ; EPS=3.5 +EO SQ0 1 6.63743120E-02 4.19576123E-04 ; EPS=3.5 + +; cross-interactions CNP-PEO, PS-PEO, and CNP-PS (not validated) +; + EO CNP 1 6.71328756E-02 4.24371279E-04 + EO STY 1 6.71328756E-02 4.24371279E-04 + EO SCY 1 5.75749747E-02 3.63952317E-04 + CNP STY 1 6.8271E-02 4.3156E-04 ; e 3.6 ; small + CNP SCY 1 6.6374E-02 4.1958E-04 ; e 3.5 ; small + +;;;;;; WATER (representing 4 H2O molecules) + +[ moleculetype ] +; molname nrexcl + W 1 + +[ atoms ] +;id type resnr residu atom cgnr charge + 1 P4 1 W W 1 0 + +;;;;;; ANTIFREEZE (prevents freezing of water) + +[ moleculetype ] +; molname nrexcl + WF 1 + +[ atoms ] +;id type resnr residu atom cgnr charge + 1 BP4 1 WF WF 1 0 + + + + diff --git a/tests/data/martini_v2.0_ions.itp b/tests/data/martini_v2.0_ions.itp new file mode 100644 index 0000000..73de72f --- /dev/null +++ b/tests/data/martini_v2.0_ions.itp @@ -0,0 +1,58 @@ +;;; IONS +; +; note, ions are not to be taken too seriously using a CG presentation. +; long range electrostatic interactions are absent, and for small ions +; the first hydration shell is considered an implicit part of the CG ion. + +;;;;;; HYDRATED SODIUM ION +; +; same particle type would represent HYDRATED POTASSIUM + +[moleculetype] +; molname nrexcl + NA+ 1 + +[atoms] +;id type resnr residu atom cgnr charge + 1 Qd 1 ION NA+ 1 1.0 + + +;;;;;; HYDRATED CHLORIDE ION + +[moleculetype] +; molname nrexcl + CL- 1 + +[atoms] +;id type resnr residu atom cgnr charge + 1 Qa 1 ION CL- 1 -1.0 + + +;;;;;; CHOLINE ION + +[moleculetype] +; molname nrexcl + NC3+ 1 + +[atoms] +;id type resnr residu atom cgnr charge + 1 Q0 1 ION NC3 1 1.0 + + +;;;;;; HYDRATED CALCIUM ION +; +; Warning: this is untested, and likely not very realistic description of +; a calcium ion. For a proper description of calcium, the solvation free +; energy should be increased. This could be achieved in an ad-hoc manner +; through increasing of the LJ well depth for water-calcium interactions. + +[moleculetype] +; molname nrexcl + CA+ 1 + +[atoms] +;id type resnr residu atom cgnr charge + 1 Qd 1 ION CA+ 1 2.0 + + + diff --git a/tests/data/system.top b/tests/data/system.top new file mode 100644 index 0000000..e20f2d7 --- /dev/null +++ b/tests/data/system.top @@ -0,0 +1,14 @@ +; Include forcefield parameters +#include "martini_v2.0_PEO_PS_CNP.itp" +#include "martini_v2.0_ions.itp" +#include "cg_model.itp" + +[ system ] +; Name +G1 (PAMAM) in water + +[ molecules ] +; Compound #mols +G1 1 +W 1238 +CL- 8 \ No newline at end of file diff --git a/tests/shared/test_utils.py b/tests/shared/test_utils.py new file mode 100644 index 0000000..a57f4d5 --- /dev/null +++ b/tests/shared/test_utils.py @@ -0,0 +1,57 @@ +import pytest + +import numpy as np +from numpy.testing import assert_almost_equal + +from swarmcg.shared import utils +from swarmcg.shared.exceptions import OptimisationResultsError + + +# TODO: add test on failure but needs dedicated exceptions rather and sys.exit +def test_forward_fill(): + # given: + x = [1, 2, 10, 4, None, None, 10, 10] + + # when: + cond_value = None + result = utils.forward_fill(x, cond_value) + + # then: + expected = [1, 2, 10, 4, 4, 4, 10, 10] + assert result == expected + + +def test_forward_fill_fail(): + # given: + x = [None, None] + + # when: + cond_value = None + with pytest.raises(OptimisationResultsError): + _ = utils.forward_fill(x, cond_value) + + +def test_sma(): + # given: + x = np.arange(10) + + # when: + window_size = 5 + result = utils.sma(x, window_size) + + # then: + expected = np.array([2, 3, 4, 5, 6, 7, 8, 9, 0, 0], dtype=float) + assert_almost_equal(result, expected) + +def test_ema(): + # given: + x = np.arange(10) + + # when: + window_size = 5 + result = utils.ewma(x, 1, window_size) + + # then: + expected = np.array([2, 3, 4, 5, 6, 7, 8, 9, 0, 0], dtype=float) + assert_almost_equal(result, expected) +