From e2306887e7579aa3fabae4211c03cce42e1337d2 Mon Sep 17 00:00:00 2001 From: Juanvi Alegre-Requena Date: Tue, 14 Feb 2023 13:07:48 +0100 Subject: [PATCH 1/6] 1. Suffix and prefix to csearch, cmin and qprep --- aqme/aqme.py | 4 ++++ aqme/cmin.py | 20 ++++++++++++++++---- aqme/csearch/crest.py | 10 ++++++++-- aqme/csearch/utils.py | 43 ++++++++++++++++++++++-------------------- aqme/qprep.py | 29 +++++++++++----------------- aqme/utils.py | 8 ++++++++ docs/Misc/versions.rst | 4 ++++ 7 files changed, 74 insertions(+), 44 deletions(-) diff --git a/aqme/aqme.py b/aqme/aqme.py index aaf8c059..9972ded7 100644 --- a/aqme/aqme.py +++ b/aqme/aqme.py @@ -100,6 +100,7 @@ def main(): constraints_angle=args.constraints_angle, constraints_dihedral=args.constraints_dihedral, prefix=args.prefix, + suffix=args.suffix, stacksize=args.stacksize, ewin_fullmonte=args.ewin_fullmonte, ewin_sample_fullmonte=args.ewin_sample_fullmonte, @@ -143,6 +144,8 @@ def main(): opt_fmax=args.opt_fmax, ani_method=args.ani_method, stacksize=args.stacksize, + prefix=args.prefix, + suffix=args.suffix, ) # QPREP @@ -161,6 +164,7 @@ def main(): charge=args.charge, mult=args.mult, suffix=args.suffix, + prefix=args.prefix, chk=args.chk, mem=args.mem, nprocs=args.nprocs, diff --git a/aqme/cmin.py b/aqme/cmin.py index e6e1b668..86812fb3 100644 --- a/aqme/cmin.py +++ b/aqme/cmin.py @@ -52,6 +52,10 @@ stacksize : str, default='1G' Controls the stack size used (especially relevant for xTB/CREST calculations of large systems, where high stack sizes are needed) + prefix : str, default='' + Prefix added to all the names + suffix : str, default='' + Suffix added to all the names xTB only ++++++++ @@ -103,7 +107,13 @@ from rdkit.Geometry import Point3D import pandas as pd import time -from aqme.utils import load_variables, rules_get_charge, substituted_mol, mol_from_sdf_or_mol_or_mol2 +from aqme.utils import ( + load_variables, + rules_get_charge, + substituted_mol, + mol_from_sdf_or_mol_or_mol2, + add_prefix_suffix +) from aqme.filter import ewin_filter, pre_E_filter, RMSD_and_E_filter from aqme.cmin_utils import creation_of_dup_csv_cmin from aqme.csearch.crest import xtb_opt_main @@ -190,6 +200,8 @@ def __init__(self, **kwargs): for file in files_cmin: # load jobs for cmin minimization self.mols, self.name = self.load_jobs(file) + self.name = add_prefix_suffix(self.name, self.args) + self.args.log.write(f"\n\n ----- {self.name} -----") if self.args.destination is None: @@ -243,9 +255,7 @@ def __init__(self, **kwargs): def load_jobs(self, file): - self.args.log.write(f"\n\no Multiple minimization of {file} with {self.args.program}") - - # read SDF files from RDKit optimization + # read SDF files try: inmols = mol_from_sdf_or_mol_or_mol2(file, 'cmin') except OSError: @@ -448,6 +458,8 @@ def ani_optimize(self, mol, charge, mult): self.args.log.finalize() sys.exit() + self.args.log.write(f"\no Starting ANI optimization") + os.environ["KMP_DUPLICATE_LIB_OK"] = "True" DEVICE = torch.device("cpu") diff --git a/aqme/csearch/crest.py b/aqme/csearch/crest.py index d7be586d..3b06e6b4 100644 --- a/aqme/csearch/crest.py +++ b/aqme/csearch/crest.py @@ -366,9 +366,15 @@ def xtb_opt_main( if file == 'crest_clustered.xyz': os.rename('crest_clustered.xyz', f"{dat_dir}/{name_no_path}_clustered.xyz") else: - os.remove(file) + try: + os.remove(file) + except OSError: # this avoids problems when running AQME in HPCs + pass except IsADirectoryError: - shutil.rmtree(file) + try: + shutil.rmtree(file) + except OSError: # this avoids problems when running AQME in HPCs + pass os.chdir(self.args.w_dir_main) diff --git a/aqme/csearch/utils.py b/aqme/csearch/utils.py index 134b041d..1d0e9685 100644 --- a/aqme/csearch/utils.py +++ b/aqme/csearch/utils.py @@ -13,6 +13,7 @@ get_info_input, mol_from_sdf_or_mol_or_mol2, read_xyz_charge_mult, + add_prefix_suffix ) from aqme.csearch.crest import nci_ts_mol @@ -106,15 +107,15 @@ def constaint_2_list(contraints): def prepare_direct_smi(args): job_inputs = [] - if args.prefix == "": - if args.name is not None: - name = args.name - else: - args.log.write(f"\nx Specify a name ('name' option) when using the 'smi' option!") - args.log.finalize() - sys.exit() + + if args.name is not None: + name = args.name + name = add_prefix_suffix(name, args) else: - name = f"{args.prefix}_{args.name}" + args.log.write(f"\nx Specify a name ('name' option) when using the 'smi' option!") + args.log.finalize() + sys.exit() + obj = ( args.smi, name, @@ -134,11 +135,11 @@ def prepare_smiles_files(args, csearch_file): with open(csearch_file) as smifile: lines = [line for line in smifile if line.strip()] job_inputs = [] - for i, line in enumerate(lines): + for _, line in enumerate(lines): ( smi, name, - ) = prepare_smiles_from_line(line, i, args) + ) = prepare_smiles_from_line(line, args) obj = ( smi, name, @@ -154,15 +155,13 @@ def prepare_smiles_files(args, csearch_file): return job_inputs -def prepare_smiles_from_line(line, i, args): +def prepare_smiles_from_line(line, args): toks = line.split() # editing part smiles = toks[0] - if args.prefix == "": - name = "".join(toks[1]) - else: - name = f"{args.prefix}_{i}_{''.join(toks[1])}" + name = toks[1] + name = add_prefix_suffix(name, args) return smiles, name @@ -189,10 +188,9 @@ def generate_mol_from_csv(args, csv_smiles, index): sys.exit() try: - if args.prefix == "": - name = csv_smiles.loc[index, "code_name"] - else: - name = f'{args.prefix}_{csv_smiles.loc[index, "code_name"]}' + name = csv_smiles.loc[index, "code_name"] + name = add_prefix_suffix(name, args) + except KeyError: args.log.write("\nx Make sure the CSV file contains a column called 'code_name' with the names of the molecules!") args.log.finalize() @@ -264,6 +262,8 @@ def prepare_cdx_files(args, csearch_file): job_inputs = [] for i, (smiles, _) in enumerate(molecules): name = f"{csearch_file.split('.')[0]}_{str(i)}" + name = add_prefix_suffix(name, args) + obj = ( smiles, name, @@ -302,7 +302,9 @@ def prepare_com_files(args, csearch_file): charge, mult = read_xyz_charge_mult(xyz_file) xyz_2_sdf(xyz_file) name = os.path.splitext(csearch_file)[0] - sdffile = f"{name}.sdf" + name = add_prefix_suffix(name, args) + + sdffile = f"{os.path.splitext(csearch_file)[0]}.sdf" suppl, _, _, _ = mol_from_sdf_or_mol_or_mol2(sdffile, "csearch") obj = ( @@ -339,6 +341,7 @@ def prepare_sdf_files(args, csearch_file): job_inputs = [] for mol, charge, mult, name in zip(suppl, charges, mults, IDs): + name = add_prefix_suffix(name, args) obj = ( mol, name, diff --git a/aqme/qprep.py b/aqme/qprep.py index 61061175..9ac3f653 100644 --- a/aqme/qprep.py +++ b/aqme/qprep.py @@ -27,6 +27,8 @@ Multiplicity of the calculations used in the following input files. If mult isn't defined, it defaults to 1 suffix : str, default='' Suffix for the new input files (i.e. FILENAME_SUFFIX.com for FILENAME.log) + prefix : str, default='' + Prefix added to all the names chk : bool, default=False Include the chk input line in new input files for Gaussian calculations mem : str, default='4GB' @@ -58,6 +60,7 @@ load_variables, read_xyz_charge_mult, mol_from_sdf_or_mol_or_mol2, + add_prefix_suffix, ) from aqme.csearch.crest import xyzall_2_xyz from pathlib import Path @@ -246,28 +249,19 @@ def get_header(self, qprep_data): """ txt = "" + name_file = add_prefix_suffix(qprep_data["name"], self.args) if self.args.program.lower() == "gaussian": if self.args.chk: - if self.args.suffix != "": - txt += f'%chk={qprep_data["name"]}_{self.args.suffix}.chk\n' - else: - txt += f'%chk={qprep_data["name"]}.chk\n' + txt += f'%chk={name_file}.chk\n' txt += f"%nprocshared={self.args.nprocs}\n" txt += f"%mem={self.args.mem}\n" - txt += f"# {self.args.qm_input}" - txt += "\n\n" - if self.args.suffix != "": - txt += f'{qprep_data["name"]}_{self.args.suffix}\n\n' - else: - txt += f'{qprep_data["name"]}\n\n' + txt += f"# {self.args.qm_input}\n\n" + txt += f'{name_file}\n\n' txt += f'{qprep_data["charge"]} {qprep_data["mult"]}\n' elif self.args.program.lower() == "orca": - if self.args.suffix != "": - txt += f'# {qprep_data["name"]}_{self.args.suffix}\n' - else: - txt += f'# {qprep_data["name"]}\n' + txt += f'# {name_file}\n' if self.args.mem.find("GB"): mem_orca = int(self.args.mem.split("GB")[0]) * 1000 elif self.args.mem.find("MB"): @@ -332,10 +326,9 @@ def write(self, qprep_data): extension = "com" elif self.args.program.lower() == "orca": extension = "inp" - if self.args.suffix != "": - comfile = f'{qprep_data["name"]}_{self.args.suffix}.{extension}' - else: - comfile = f'{qprep_data["name"]}.{extension}' + + name_file = add_prefix_suffix(qprep_data["name"], self.args) + comfile = f'{name_file}.{extension}' if os.path.exists(self.args.w_dir_main / comfile): os.remove(self.args.w_dir_main / comfile) diff --git a/aqme/utils.py b/aqme/utils.py index 6e04b1cb..c4ac0ec2 100644 --- a/aqme/utils.py +++ b/aqme/utils.py @@ -810,3 +810,11 @@ def mol_from_sdf_or_mol_or_mol2(input_file, module): mults.append(mult) return suppl, charges, mults, IDs + +def add_prefix_suffix(name, args): + if args.prefix != "": + name = f"{args.prefix}_{name}" + if args.suffix != "": + name += f'_{args.suffix}' + + return name diff --git a/docs/Misc/versions.rst b/docs/Misc/versions.rst index afe8bcaa..3aaaaac1 100644 --- a/docs/Misc/versions.rst +++ b/docs/Misc/versions.rst @@ -4,11 +4,15 @@ Versions ======== +Version 1.4.5 [`url `__] + - Suffix/prefix options work in CSEARCH, CMIN and QPREP + Version 1.4.4 [`url `__] - When using a CSV as input, the user can specify charge and mult for each species by using the charge/mult columns - QCORR now detects duplicates including the successful calculations from previous runs - Fixed an error in full_check from QCORR when using genecp + - Admits lists in command lines specified as ["X"], "[X]" and '["X"]' Version 1.4.3 [`url `__] - Return metal into RDKit mol object when using the metal_atoms option with CSEARCH-CREST From 7ec6235a7a86cfb7cf2a4c2dcf7a0fad9831fdd1 Mon Sep 17 00:00:00 2001 From: Turki Alturaifi Date: Tue, 21 Feb 2023 12:11:36 -0500 Subject: [PATCH 2/6] added auto metal detection --- aqme/aqme.py | 1 + aqme/argument_parser.py | 1 + aqme/csearch/base.py | 43 ++++++++++++++++++++++++++++------------- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/aqme/aqme.py b/aqme/aqme.py index 9972ded7..6120b7fa 100644 --- a/aqme/aqme.py +++ b/aqme/aqme.py @@ -74,6 +74,7 @@ def main(): sample=args.sample, max_workers=args.max_workers, metal_atoms=args.metal_atoms, + auto_metal_atoms=args.auto_metal_atoms, metal_oxi=args.metal_oxi, metal_idx=args.metal_idx, complex_coord=args.complex_coord, diff --git a/aqme/argument_parser.py b/aqme/argument_parser.py index 422f0f17..3ec40437 100644 --- a/aqme/argument_parser.py +++ b/aqme/argument_parser.py @@ -18,6 +18,7 @@ "qcorr": False, "smi": None, "metal_atoms": [], + "auto_metal_atoms": True, "charge": None, "mult": None, "complex_coord": [], diff --git a/aqme/csearch/base.py b/aqme/csearch/base.py index 536d2c2d..9998d4ec 100644 --- a/aqme/csearch/base.py +++ b/aqme/csearch/base.py @@ -238,6 +238,9 @@ def __init__(self, **kwargs): self.args.log.finalize() sys.exit() + if str(self.args.auto_metal_atoms) == "False": + self.args.auto_metal_atoms = False + if self.args.program.lower() == "crest": try: subprocess.run( @@ -485,6 +488,22 @@ def compute_confs( shutil.copy(f"{name}.xyz", f"{name}_{self.args.program.lower()}.xyz") template_opt = False + + # check if metals and oxidation states are both used + if self.args.metal_atoms != []: + if self.args.metal_oxi == []: + self.args.log.write(f"\nx Metal atoms ({self.args.metal_atoms}) were specified without their corresponding oxidation state (metal_oxi option)") + self.args.log.finalize() + sys.exit() + + if self.args.metal_oxi != []: + if self.args.metal_atoms == []: + self.args.log.write(f"\nx Metal oxidation states ({self.args.metal_oxi}) were specified without their corresponding metal atoms (metal_atoms option)") + self.args.log.finalize() + sys.exit() + + if self.args.auto_metal_atoms: + _ = self.find_metal_atom(mol) # replaces the metal for an I atom if len(self.args.metal_atoms) >= 1: ( @@ -567,6 +586,17 @@ def compute_confs( frames = [self.final_dup_data, total_data] self.final_dup_data = pd.concat(frames, ignore_index=True, sort=True) + # automatic detection of metal atoms + def find_metal_atom(self,mol): + transition_metals = ['Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Y', 'Zr', 'Nb', 'Mo', + 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', + 'Hg', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og'] + for atom in mol.GetAtoms(): + if atom.GetSymbol() in transition_metals: + self.args.metal_atoms.append(atom.GetSymbol()) + if len(self.args.metal_atoms) > 0: + self.args.log.write(f"\no AQME recognized the following metal atoms: {self.args.metal_atoms}") + def conformer_generation( self, mol, @@ -593,19 +623,6 @@ def conformer_generation( start_time = time.time() status = None - # check if metals and oxidation states are both used - if self.args.metal_atoms != []: - if self.args.metal_oxi == []: - self.args.log.write(f"\nx Metal atoms ({self.args.metal_atoms}) were specified without their corresponding oxidation state (metal_oxi option)") - self.args.log.finalize() - sys.exit() - - if self.args.metal_oxi != []: - if self.args.metal_atoms == []: - self.args.log.write(f"\nx Metal oxidation states ({self.args.metal_oxi}) were specified without their corresponding metal atoms (metal_atoms option)") - self.args.log.finalize() - sys.exit() - # Set charge and multiplicity metal_found = False # user can overwrite charge and mult with the corresponding arguments From 73fdb7adba7efba08cea031a4e532252deb797ac Mon Sep 17 00:00:00 2001 From: Juanvi Alegre-Requena Date: Fri, 24 Feb 2023 17:16:51 +0100 Subject: [PATCH 3/6] 1. Migrating Readme to ReadtheDocs --- Installation instructions.txt | 12 - README.md | 386 +------------------------------ aqme/csearch/base.py | 6 +- docs/Misc/license.rst | 2 +- docs/Quickstart/basic.rst | 2 +- docs/Quickstart/development.rst | 2 +- docs/Quickstart/requirements.rst | 2 +- docs/Quickstart/setup.rst | 6 +- README.rst => docs/README.rst | 6 +- docs/index.rst | 8 +- meta.yaml | 55 ----- setup.py | 6 +- 12 files changed, 26 insertions(+), 467 deletions(-) delete mode 100644 Installation instructions.txt rename README.rst => docs/README.rst (97%) delete mode 100644 meta.yaml diff --git a/Installation instructions.txt b/Installation instructions.txt deleted file mode 100644 index 7311f9fc..00000000 --- a/Installation instructions.txt +++ /dev/null @@ -1,12 +0,0 @@ -## Installation -Check our AQME installation in 2 mins video (https://www.youtube.com/watch?v=VeaBzqIZHbo) for a quick installation guide using pip. In a nutshell, AQME and its dependencies are installed as follows: -1. Using conda-forge: "conda install -c conda-forge aqme" -2. Using the code in GitHub: "pip install ." with setup.py (most updated version, recommended) -3. Using pip: "pip install aqme" - -Requirements: - * When AQME is installed from pip, RDKit and Openbabelare required: "conda install -c conda-forge rdkit openbabel" - * If CMIN is used with ANI, torch-related modules are needed: "pip install torch torchvision torchani" - -Known incompatibilities: - * RDKit cannot be installed through "pip install rdkit" in Windows when Anaconda prompts are used diff --git a/README.md b/README.md index 71200b34..5dc3796a 100644 --- a/README.md +++ b/README.md @@ -4,392 +4,20 @@ [![Downloads](https://img.shields.io/conda/dn/conda-forge/aqme?label=Downloads&logo=Anaconda)](https://anaconda.org/conda-forge/aqme) [![Read the Docs](https://img.shields.io/readthedocs/aqme?label=Read%20the%20Docs&logo=readthedocs)](https://aqme.readthedocs.io/) -___ -#

Automated Quantum Mechanical Environments (AQME)

-##

-- Table of contents --

- -###

[What is AQME?](https://github.com/jvalegre/aqme#what-is-aqme)     [Installation](https://github.com/jvalegre/aqme#installation)     [Requirements](https://github.com/jvalegre/aqme#requirements)

-###

[Example workflows and running tests](https://github.com/jvalegre/aqme#example-workflows-and-running-tests-1)

-###

[Features and modules](https://github.com/jvalegre/aqme#features-and-modules)     [Quickstart](https://github.com/jvalegre/aqme#quickstart)     [Extended documentation](https://github.com/jvalegre/aqme#extended-documentation-installation-use-examples-etc)

-###

[Developers and help desk](https://github.com/jvalegre/aqme#developers-and-help-desk)     [License](https://github.com/jvalegre/aqme#license)     [Reference](https://github.com/jvalegre/aqme#reference)

-___ -## What is AQME? -The code is an ensemble of automated QM workflows that can be run through jupyter notebooks, command lines and yaml files. Some of the most popular workflows include: - - [ ] RDKit- and CREST-based conformer generator leading to ready-to-submit QM input files starting from individual files or SMILES databases - - [ ] Post-processing of QM output files to fix convergence errors, extra imaginary frequencies, unfinished jobs, duplicates and error terminations, as well as to detect spin contamination, isomerization issues, and more optimization problems - - [ ] Analysis of homogeneity of QM calculations (same level of theory, grid size, program and version, solvation models, etc) - - [ ] Generation of xTB, DFT and RDKit descriptors in json and csv files that are ready to use in machine-learning models or used to predict NMR spectra - - [ ] Other useful workflows +## Documentation +Full documentation with installation instructions, technical details and examples can be found in [Read the Docs](https://aqme.readthedocs.io). Don't miss out the latest hands-on tutorials from our [YouTube channel](https://www.youtube.com/channel/UCHRqI8N61bYxWV9BjbUI4Xw)! -## Installation -Check our [AQME installation in 2 mins](https://youtu.be/VeaBzqIZHbo) video for a quick installation guide. In a nutshell, AQME and its dependencies are installed as follows: +## Recommended installation and update guide +In a nutshell, AQME and its dependencies are installed/updated as follows: 1. Install using conda-forge: `conda install -c conda-forge aqme` -2. Install using pip: `pip install aqme` -3. Update to the latest version: `pip install aqme --upgrade` - -Extra requirements if xTB or CREST are used (MacOS and Linux only): - * xTB: `conda install -y -c conda-forge xtb` - * CREST: `conda install -y -c conda-forge crest` - -Extra requirements if CMIN is used with ANI models: - * torch-related modules: `pip install torch torchvision torchani` - -Known incompatibilities: - * RDKit cannot be installed through `pip install rdkit` in Windows when Anaconda prompts are used - -## Requirements -* Python 3 -* Any of the AQME installation options as detailed in the installation section -* Torch-related modules if CMIN is used (shown in the installation section) - -## Example workflows and running tests -- The inputs to run pre-defined AQME end-to-end workflows are available in the "/Example_workflows/End-to-end_Workflows" folder. Choose the workflow and run the inputs. -- Automated protocols for individual modules and tasks are provided in the /Example_workflows/ folder inside subfolders with the corresponding module names. -- To run the tests, run pytest in a terminal as follows `pytest --v` from the main AQME folder or `pytest --v PATH` using the PATH where AQME is installed. - -## Features and modules -### CSEARCH -Module on charge of conformational sampling starting from multiple input types (SMILES, csv, sdf, xyz, etc). Options: -#### RDKit-based conformational sampling -Faster sampling, suitable especially for unimolecular systems. Options: - - [ ] RDKit standard sampling - - [ ] Systematic Unbounded Multiple Minimum search (SUMM) - - [ ] FullMonte sampling -#### CREST-based conformational sampling -Slower sampling, suitable for all types of systems (including noncovalent complexes and constrained systems such as transition states) - -### CMIN -Module used to refine conformers generated in CSEARCH through new geometry optimizations. Options: - - [ ] xTB (GFN0-xTB, GFN1-xTB, GFN2-xTB, GFN-FF) - - [ ] ANI (ANI-1x, ANI-1ccx, ANI-2x) - -### QPREP -Generator of input files for QM calculations. Options: - - [ ] Gaussian - - [ ] ORCA - - [ ] pySCF (loading parameters in jupyter notebook) - -### QCORR -cclib-based analyzer of output files from multiple QM programs. This module: - - [ ] Separates normally terminated files with no errors, extra imaginary frequencies, duplicates, isomerization to other systems and spin contamination - - [ ] Automatically generates new com files to "fix" the different issues of the calculations with strategies that are optimal for each type of issue (Gaussian and ORCA) - - [ ] Checks that all the calculations are homogeneous (i.e. using the same level of theory, same grid size, same program and version, solvation model, etc) - -### QDESC -Descriptor generator from multiple input types such as SMILES, log files, xyz, etc. Descriptors generated with: - - [ ] RDKit descriptors (i.e. number of polar H, number of aromatic rings, etc) - - [ ] xTB (i.e. atomic charges, molecular dipole, solvation energy, etc) - - [ ] QM programs (i.e. descriptors from cclib) - -## Quickstart -### Using AQME in Jupyter Notebooks -There are multiple ready-to-use workflows presented as jupyter notebooks in the 'Example workflows' folder. Some examples are: - * CSEARCH_CMIN_conformer_generation folder --> CSEARCH/CMIN conformational sampling from SMILES and creation of QM input files - - * QCORR_processing_QM_outputs --> QCORR analysis of Gaussian output files, generation of JSON files with all the information and creation of new QM input files - - * QPREP_generating_input_files --> QPREP preparation of input files for Gaussian, ORCA and PySCF from LOG/OUT, SDF and JSON files - -### Using AQME through command lines in terminals -AQME can also be run through command lines. Some examples are: - * CSEARCH for conformer generation with one SMILES and name using RDKit or CREST (use rdkit or crest in --program): - ``` - python -m aqme --csearch --program rdkit --smi "CCC" --name proprane - ``` - * CSEARCH for conformer generation with multiple SMILES and names (i.e. from a database in CSV format): - ``` - python -m aqme --csearch --program rdkit --input FILENAME.csv - ``` - ** The csv file must contain the list of SMILES in a column called "SMILES" and the corresponding names in a column called "code_name" (see Example_workflows for more information) - - * CMIN geometry optimization with xTB or ANI (use xtb or ani in --program; use sdf, xyz, com/gjf or pdb in --files): - ``` - python -m aqme --cmin --program xtb --files "*.sdf" - ``` - - * QPREP input file generation from SDF, JSON and LOG/OUT files (replace "*.sdf" for the corresponding format): - ``` - python -m aqme --qprep --program gaussian --qm_input "M062x def2tzvp opt freq" --files "*.sdf" - ``` - - * QCORR analysis of Gaussian output files and json file generation: - ``` - python -m aqme --qcorr --program gaussian --freq_conv "opt=(calcfc,maxstep=5)" --files "*.log" - ``` - -## Extended documentation (installation, use, examples, etc) -** ReadTheDocs page in process ** -- [ ] CSEARCH arguments: - **input : str, default=''** - (If smi is None) Optionally, file containing the SMILES strings and names of the molecules. Current file extensions: .smi, .sdf, .cdx, .csv, .com, .gjf, .mol, .mol2, .xyz, .txt, .yaml, .yml, .rtf - For .csv files (i.e. FILENAME.csv), two columns are required, 'code_name' with the names and 'SMILES' for the SMILES string - **program : str, default=None** - Program required in the conformational sampling. Current options: 'rdkit', 'summ', 'fullmonte', 'crest' - **smi : str, default=None** - Optionally, define a SMILES string as input - **name : str, default=None** - (If smi is defined) optionally, define a name for the system - **w_dir_main : str, default=os.getcwd()** - Working directory - **varfile : str, default=None** - Option to parse the variables using a yaml file (specify the filename) - **max_workers : int, default=4** - Number of simultaneous RDKit jobs run with multiprocessing (WARNING! More than 12 simultaneous jobs might collapse your computer!) - **charge : int, default=None** - Charge of the calculations used in the following input files. If charge isn't defined, it automatically reads the charge of the SMILES string - **mult : int, default=None** - Multiplicity of the calculations used in the following input files. If mult isn't defined, it automatically reads the multiplicity of the mol object created with the SMILES string. Be careful with the automated calculation of mult from mol objects when using metals! - **prefix : str, default=''** - Prefix added to all the names - **suffix : str, default=''** - Suffix added to all the names - **stacksize : str, default='1G'** - Controls the stack size used (especially relevant for xTB/CREST calculations of large systems, where high stack sizes are needed) - - *-- Options for RDKit-based methods (RDKit, SUMM and Fullmonte), organic and organometallic molecules --* - **sample : int, default='auto'** - Number of conformers used initially in the RDKit sampling. If this option isn't specified, AQME automatically calculates (previously benchmarked) an approximate number based on number of rotatable bonds, XH (i.e. OH) groups, saturated cycles, etc (see the auto_sampling() function in csearch.py for more information) - **auto_sample : int, default=20** - Base multiplicator number used in the sample option - **ff : str, default='MMFF'** - Force field used in RDKit optimizations and energy calculations. Current options: MMFF and UFF (if MMFF fails, AQME tries to use UFF automatically) - **ewin_csearch : float, default=5.0** - Energy window in kcal/mol to discard conformers (i.e. if a conformer is more than the E window compared to the most stable conformer) - **initial_energy_threshold : float, default=0.0001** - Energy difference in kcal/mol between unique conformers for the first filter of only E - **energy_threshold : float, default=0.25** - Energy difference in kcal/mol between unique conformers for the second filter of E + RMS - **rms_threshold : float, default=0.25** - RMS difference between unique conformers for the second filter of E + RMS - **opt_steps_rdkit : int, default=1000** - Max cycles used in RDKit optimizations - **heavyonly : bool, default=True** - Only consider heavy atoms during RMS calculations for filtering (in the Chem.rdMolAlign.GetBestRMS() RDKit function) - **max_matches_rmsd : int, default=1000** - Max matches during RMS calculations for filtering (maxMatches option in the Chem.rdMolAlign.GetBestRMS() RDKit function) - **max_mol_wt : int, default=0** - Discard systems with molecular weights higher than this parameter (in g/mol). If 0 is set, this filter is off - **max_torsions : int, default=0** - Discard systems with more than this many torsions (relevant to avoid molecules with many rotatable bonds). If 0 is set, this filter is off - **seed : int, default=62609** - Random seed used during RDKit embedding (in the Chem.rdDistGeom.EmbedMultipleConfs() RDKit function) - - *-- Options for RDKit-based methods (RDKit, SUMM and Fullmonte), organometallic molecules only --* - **metal_atoms : list of str, default=[]** - Specify metal atom(s) of the system as [ATOM_TYPE]. Multiple metals can be used simultaneously (i.e. ['Pd','Ir']). This option is important to calculate the charge of metal complexes based on SMILES strings. Requires the use of metal_oxi. - **metal_oxi : list of int, default=[]** - Specify metal oxidation state as [NUMBER]. Multiple metals can be used simultaneously (i.e. [2,3]). - **complex_type : str, default=''** - Forces the metal complexes to adopt a predefined geometry. This option is especially relevant when RDKit predicts wrong complex geometries or gives a mixture of geometries. Current options: squareplanar, squarepyramidal, linear, trigonalplanar - - *-- Options for the SUMM method only --* - **degree : float, default=120.0** - Interval of degrees to rotate dihedral angles during SUMM sampling (i.e. 120.0 would create 3 conformers for each dihedral, at 0, 120 and 240 degrees) - - *-- Options for the Fullmonte method only --* - **ewin_fullmonte : float, default=5.0** - Energy window in kcal/mol to discard conformers (i.e. if a conformer is more than the E window compared to the most stable conformer) - **ewin_sample_fullmonte : float, default=2.0** - Energy window in kcal/mol to use conformers during the Fullmonte sampling (i.e. conformers inside the E window compared to the most stable conformer are considered as unique in each step of the sampling) - **nsteps_fullmonte : int, default=100** - Number of steps (or conformer batches) to carry during the Fullmonte sampling - **nrot_fullmonte : int, default=3** - Number of dihedrals to rotate simultaneously (picked at random) during each step of the Fullmonte sampling - **ang_fullmonte : float, default=30** - Available angle interval to use in the Fullmonte sampling. For example, if the angle is 120.0, the program chooses randomly between 120 and 240 degrees (picked at random) during each step of the sampling - - *-- Options for CREST --* - **nprocs : int, default=2** - Number of processors used in CREST optimizations - **constraints_atoms : list, default=[]** - Specify constrained atoms as [AT1,AT2,AT3]. An example of multiple constraints (atoms 1, 2 and 5 are frozen: [1,2,5] - **constraints_dist : list of lists, default=[]** - Specify distance constraints as [AT1,AT2,DIST]. An example of multiple constraints (atoms 1 and 2 with distance 1.8 Å, and atoms 4 and 5 with distance 2.0 Å): [[1,2,1.8],[4,5,2.0]] - **constraints_angle : list of lists, default=[]** - Specify angle constraints as [AT1,AT2,AT3,ANGLE]. An example of multiple constraints (atoms 1, 2 and 3 with an angle of 180 degrees, and atoms 4, 5 and 6 with an angle of 120): [[1,2,3,180],[4,5,6,120]] - **constraints_dihedral : list of lists, default=[]** - Specify dihedral constraints as [AT1,AT2,AT3,AT4,DIHEDRAL]. An example of multiple constraints (atoms 1, 2, 3 and 4 with a dihedral angle of 180 degrees, and atoms 4, 5, 6 and 7 with a dihedral angle of 120): [[1,2,3,4,180],[4,5,6,7,120]] - **crest_force : float, default=0.5** - Force constant for constraints in the .xcontrol.sample file for CREST jobs - **crest_keywords : str, default=None** - Define additional keywords to use in CREST that are not included in --chrg, --uhf, -T and -cinp. For example: '--alpb ch2cl2 --nci --cbonds 0.5' - **cregen : bool, default=False** - If True, perform a CREGEN analysis after CREST (filtering options below) - **cregen_keywords : str, default=None** - Additional keywords for CREGEN (i.e. cregen_keywords='--ethr 0.02') - **xtb_keywords : str, default=None** - Define additional keywords to use in the xTB pre-optimization that are not included in -c, --uhf, -P and --input. For example: '--alpb ch2cl2 --gfn 1' - -- [ ] CMIN arguments: - **files : str or list of str, default=None** - Input files. Formats accepted: XYZ, SDF, GJF, COM and PDB. Also, lists can be used (i.e. [FILE1.sdf, FILE2.sdf] or \*.FORMAT such as \*.sdf). - **program : str, default=None** - Program required in the conformational refining. Current options: 'xtb', 'ani' - **w_dir_main : str, default=os.getcwd()** - Working directory - **destination : str, default=None,** - Directory to create the output file(s) - **varfile : str, default=None** - Option to parse the variables using a yaml file (specify the filename) - **nprocs : int, default=2** - Number of processors used in the xTB optimizations - **charge : int, default=None** - Charge of the calculations used in the xTB calculations. If charge isn't defined, it automatically reads the charge from the input SDF files (if the files come from CSEARCH, which adds the property "Real charge") or calculates it from the generated mol object - **mult : int, default=None** - Multiplicity of the calculations used in the xTB calculations. If charge isn't defined, it automatically reads the charge from the input SDF files (if the files come from CSEARCH, which adds the property "Mult") or calculates it from the generated mol object. Be careful with the automated calculation of mult from mol objects when using metals! - **metal_atoms : list of str, default=[]** - Specify metal atom(s) of the system as [ATOM_TYPE]. Multiple metals can be used simultaneously (i.e. ['Pd','Ir']). This option is useful to calculate molecular charges automatically (i.e. from metal databases). Requires the use of metal_oxi. - **metal_oxi : list of int, default=[]** - Specify metal oxidation state as [NUMBER]. Multiple metals can be used simultaneously (i.e. [2,3]). - **ewin_cmin : float, default=5.0** - Energy window in kcal/mol to discard conformers (i.e. if a conformer is more than the E window compared to the most stable conformer) - **initial_energy_threshold : float, default=0.0001** - Energy difference in kcal/mol between unique conformers for the first filter of only E - **energy_threshold : float, default=0.25** - Energy difference in kcal/mol between unique conformers for the second filter of E + RMS - **rms_threshold : float, default=0.25** - RMS difference between unique conformers for the second filter of E + RMS - **stacksize : str, default='1G'** - Controls the stack size used (especially relevant for xTB/CREST calculations of large systems, where high stack sizes are needed) - - *-- Options for xTB --* - **xtb_keywords : str, default=None** - Define additional keywords to use in xTB that are not included in -c, --uhf, -P and --input. For example: '--alpb ch2cl2 --gfn 1' - **constraints_atoms : list, default=[]** - Specify constrained atoms as [AT1,AT2,AT3]. An example of multiple constraints (atoms 1, 2 and 5 are frozen: [1,2,5] - **constraints_dist : list of lists, default=[]** - Specify distance constraints as [AT1,AT2,DIST]. An example of multiple constraints (atoms 1 and 2 with distance 1.8 Å, and atoms 4 and 5 with distance 2.0 Å): [[1,2,1.8],[4,5,2.0]] - **constraints_angle : list of lists, default=[]** - Specify angle constraints as [AT1,AT2,AT3,ANGLE]. An example of multiple constraints (atoms 1, 2 and 3 with an angle of 180 degrees, and atoms 4, 5 and 6 with an angle of 120): [[1,2,3,180],[4,5,6,120]] - **constraints_dihedral : list of lists, default=[]** - Specify dihedral constraints as [AT1,AT2,AT3,AT4,DIHEDRAL]. An example of multiple constraints (atoms 1, 2, 3 and 4 with a dihedral angle of 180 degrees, and atoms 4, 5, 6 and 7 with a dihedral angle of 120): [[1,2,3,4,180],[4,5,6,7,120]] - - *-- Options for ANI --* - **opt_steps : int, default=1000** - Maximum number of steps used in the ase.optimize.BFGS optimizer. - **opt_fmax : float, default=0.05** - Maximum force value to determine convergence in the ase.optimize.BFGS optimizer. - **ani_method : str, default='ANI2x'** - ANI model used in the ase.optimize.BFGS optimizer. - -- [ ] QPREP arguments: - **files : mol object, str or list of str, default=None** - Files used to prepare input QM file(s). Formats accepted: mol object(s), Gaussian or ORCA LOG/OUT output files, JSON, XYZ, SDF, PDB. Also, lists can be used (i.e. [FILE1.log, FILE2.log] or \*.FORMAT such as \*.json). - **program : str, default=None** - Program required to create the new input files. Current options: 'gaussian', 'orca' - **atom_types : list of str, default=[]** - (If files is None) List containing the atoms of the system - **cartesians : list of str, default=[]** - (If files is None) Cartesian coordinates used for further processing - **w_dir_main : str, default=os.getcwd()** - Working directory - **destination : str, default=None,** - Directory to create the input file(s) - **varfile : str, default=None** - Option to parse the variables using a yaml file (specify the filename) - **qm_input : str, default=''** - Keywords line for new input files (i.e. 'B3LYP/6-31G opt freq') - **qm_end : str, default=''** - Final line(s) in the new input files - **charge : int, default=None** - Charge of the calculations used in the following input files. If charge isn't defined, it defaults to 0 - **mult : int, default=None** - Multiplicity of the calculations used in the following input files. If mult isn't defined, it defaults to 1 - **suffix : str, default=''** - Suffix for the new input files (i.e. FILENAME_SUFFIX.com for FILENAME.log) - **chk : bool, default=False** - Include the chk input line in new input files for Gaussian calculations - **mem : str, default='4GB'** - Memory for the QM calculations (i) Gaussian: total memory; (ii) ORCA: memory per processor - **nprocs : int, default=2** - Number of processors used in the QM calculations - **gen_atoms : list of str, default=[]** - Atoms included in the gen(ECP) basis set (i.e. ['I','Pd']) - **bs_gen : str, default=''** - Basis set used for gen(ECP) atoms (i.e. 'def2svp') - **bs_nogen : str, default=''** - Basis set used for non gen(ECP) atoms in gen(ECP) calculations (i.e. '6-31G\*') - -- [ ] QCORR arguments: - **files : list of str, default=''** - Filenames of QM output files to analyze. If \*.log (or other strings that are not lists such as \*.out) are specified, the program will look for all the log files in the working directory through glob.glob(\*.log) - **w_dir_main : str, default=os.getcwd()** - Working directory - **fullcheck : bool, default=True** - Perform an analysis to detect whether the calculations were done homogeneously (i.e. same level of theory, solvent, grid size, etc) - **varfile : str, default=None** - Option to parse the variables using a yaml file (specify the filename) - **ifreq_cutoff : float, default=0.0** - Cut off for to consider whether a frequency is imaginary (absolute of the specified value is used) - **amplitude_ifreq : float, default=0.2** - Amplitude used to displace the imaginary frequencies to fix - **freq_conv : str, default=None** - If a string is defined, it will remove calculations that converged during optimization but did not convergence in the subsequent frequency calculation. Options: opt keyword as string (i.e. 'opt=(calcfc,maxstep=5)'). If readfc is specified in the string, the chk option must be included as well. - **s2_threshold : float, default=10.0** - Cut off for spin contamination during analysis in % of the expected value (i.e. multiplicity 3 has an the expected of 2.0, if s2_threshold = 10, the value is allowed to be 2.0 +- 0.2). Set s2_threshold = 0 to deactivate this option. - **dup_threshold : float, default=0.0001** - Energy (in hartree) used as the energy difference in E, H and G to detect duplicates - **isom_type : str, default=None** - Check for isomerization from the initial input file to the resulting output files. It requires the extension of the initial input files (i.e. isom_type='com' or 'gjf') and the folder of the input files must be added in the isom_inputs option - **isom_inputs : str, default=os.getcwd()** - Folder containing the initial input files to check for isomerization - **vdwfrac : float, default=0.50** - Fraction of the summed VDW radii that constitutes a bond between two atoms in the isomerization filter - **covfrac : float, default=1.10** - Fraction of the summed covalent radii that constitutes a bond between two atoms in the isomerization filter - - *-- Options related to file generation to fix issues found by QCORR --* - New input files are generated through the QPREP module and, therefore, all QPREP arguments can be used when calling QCORR and will overwrite default options. For example, if the user specifies qm_input='wb97xd/def2svp', all the new input files generated to fix issues will contain this keywords line. See examples in the 'Example_workflows' folder for more information. - -- [ ] QDESCP arguments: - **program : str, default=None** - Program required to create the new descriptors. Current options: 'xtb', 'nmr' - **w_dir_main : str, default=os.getcwd()** - Working directory - **destination : str, default=None,** - Directory to create the JSON file(s) - - *-- Options for xTB descriptor generation (program='xtb') --* - **files : list of str, default=''** - Filenames of SDF/PDB/XYZ files to calculate xTB descriptors. If \*.sdf (or other strings that are not lists such as \*.pdb) are specified, the program will look for all the SDF files in the working directory through glob.glob(\*.sdf) - **charge : int, default=None** - Charge of the calculations used in the following input files. If charge isn't defined, it defaults to 0 - **mult : int, default=None** - Multiplicity of the calculations used in the following input files. If mult isn't defined, it defaults to 1 - **qdescp_solvent : str, default=None** - Solvent used in the xTB property calculations (ALPB model) - **qdescp_temp : float, default=300** - Temperature required for the xTB property calculations - **qdescp_acc : float, default=0.2** - Accuracy required for the xTB property calculations - **boltz : bool, default=True** - Calculate Boltzmann averaged xTB properties and include RDKit molecular features - - *-- Options for NMR spectra simulation (program='nmr') --* - **files : list of str, default=''** - Filenames of LOG files to retrieve NMR shifts from Gaussian calculations - **boltz : bool, default=True** - Calculate Boltzmann averaged NMR shifts - **nmr_atoms : list of str, default=[6, 1]** - List containing the atom types to consider. For example, if the user wants to retrieve NMR shifts from C and H atoms nmr_atoms=[6, 1] - **nmr_slope : list of float, default=[-1.0537, -1.0784]** - List containing the slope to apply for the raw NMR shifts calculated with Gaussian. A slope needs to be provided for each atom type in the analysis (i.e., for C and H atoms, the nmr_slope=[-1.0537, -1.0784]). These values can be adjusted using the CHESHIRE repository. - **nmr_intercept : list of float, default=[181.7815, 31.8723]** - List containing the intercept to apply for the raw NMR shifts calculated with Gaussian. An intercept needs to be provided for each atom type in the analysis (i.e., for C and H atoms, the nmr_slope=[-1.0537, -1.0784]). These values can be adjusted using the CHESHIRE repository. - **nmr_experim : str, default=None** - Filename of a CSV containing the experimental shifts. Two columnds are needed: A) 'atom_idx' should contain the indexes of the atoms to study as seen in GaussView or other molecular visualizers (i.e., the first atom of the coordinates has index 1); B) 'experimental_ppm' should contain the experimental NMR shifts in ppm observed for the atoms. - -- [ ] VISMOL arguments: - **files : list of str, default=''** - Filenames of SDF/PDB/XYZ to visualize conformers. If \*.sdf (or other strings that are not lists such as \*.pdb) are specified, the program will look for all the SDF files in the working directory through glob.glob(\*.sdf). Internal options of "line", "stick", "sphere" incorporated. Code reference from: [https://iwatobipen.wordpress.com] - +2. Update to the latest version: `pip install aqme --upgrade` ## Developers and help desk List of main developers and contact emails: - [ ] [Juan V. Alegre-Requena](https://orcid.org/0000-0002-0769-7168), main developer of the CSEARCH, QCORR, QPREP and QDESCP modules. Contact: [jv.alegre@csic.es](mailto:jv.alegre@csic.es) - - [ ] [Shree Sowndarya S. V.](https://orcid.org/0000-0002-4568-5854), main developer of the CSEARCH, CMIN, QDESCP and VIZMOL modules. Contact: [svss@colostate.edu](mailto:svss@colostate.edu) + - [ ] [Shree Sowndarya S. V.](https://orcid.org/0000-0002-4568-5854), main developer of the CSEARCH, CMIN, QDESCP and VISMOL modules. Contact: [svss@colostate.edu](mailto:svss@colostate.edu) - [ ] [Turki Alturaifi](https://www.chem.pitt.edu/person/turki-alturaifi), worked in benchmarking the parameters for RDKit-based conformer generation. Contact: [tma53@pitt.edu](mailto:tma53@pitt.edu) - [ ] [Raúl Pérez-Soto](https://orcid.org/0000-0002-6237-2155), worked in refactoring the code and creating the documentation. Contact: [Raul.Perez_Soto@colostate.edu](mailto:Raul.Perez_Soto@colostate.edu) - [ ] [Robert S. Paton](https://orcid.org/0000-0002-0104-4166), research group supervisor and code advisor. Contact: [robert.paton@colostate.edu](mailto:robert.paton@colostate.edu) @@ -401,7 +29,7 @@ AQME is freely available under an [MIT](https://opensource.org/licenses/MIT) Lic ## Reference If you use any of the AQME modules, please include this citation: -AQME v1.4, Alegre-Requena, J. V.; Sowndarya, S.; Alturaifi, T.; Pérez-Soto, R.; Paton, R. ChemRxiv 2022, DOI: 10.26434/chemrxiv-2022-dnc48. +AQME v1.4, Alegre-Requena, J. V.; Sowndarya, S.; Pérez-Soto, R.; Alturaifi, T.; Paton, R. AQME: Automated Quantum Mechanical Environments for Researchers and Educators. Wiley Interdiscip. Rev. Comput. Mol. Sci. 2023, DOI: 10.1002/wcms.1663. Additionally, please include the corresponding references for the following programs: * If you used CSEARCH with RDKit methods or from SMILES: [RDKit](https://www.rdkit.org) diff --git a/aqme/csearch/base.py b/aqme/csearch/base.py index 536d2c2d..1f5f2e55 100644 --- a/aqme/csearch/base.py +++ b/aqme/csearch/base.py @@ -19,7 +19,9 @@ name : str, default=None (If smi is defined) optionally, define a name for the system w_dir_main : str, default=os.getcwd() - Working directory + Working directory + destination : str, default=None, + Directory to create the output file(s) varfile : str, default=None Option to parse the variables using a yaml file (specify the filename) max_workers : int, default=4 @@ -1202,7 +1204,7 @@ def rdkit_to_sdf( Chem.SanitizeMol(mol) mol = Chem.AddHs(mol) except Chem.AtomValenceException: # this happens sometimes with complex metals when substituting the metal with an I atom - self.args.log.write(f'\nx The species provided could not be converted into a mol object wth RDKit. It normally happens with tricky metal complexes and might be fixed with different structures (i.e., changing a single bond + positive charge with a double bond).') + self.args.log.write(f'\nx The species provided could not be converted into a mol object wth RDKit. It normally happens with tricky metal complexes and might be fixed with a couple tricks (i.e., using the metal_atoms="[\'M\']" option, changing a single bond + positive charge with a double bond).') return -1, None, None, None mol.SetProp("_Name", name) diff --git a/docs/Misc/license.rst b/docs/Misc/license.rst index 34cc2708..91e605ba 100644 --- a/docs/Misc/license.rst +++ b/docs/Misc/license.rst @@ -2,7 +2,7 @@ License ======= -.. include:: ../../README.rst +.. include:: ../README.rst :start-after: license-start :end-before: license-end diff --git a/docs/Quickstart/basic.rst b/docs/Quickstart/basic.rst index eb258b23..fb482511 100644 --- a/docs/Quickstart/basic.rst +++ b/docs/Quickstart/basic.rst @@ -2,7 +2,7 @@ Usage ===== -.. include:: ../../README.rst +.. include:: ../README.rst :start-after: quickstart-start :end-before: quickstart-end diff --git a/docs/Quickstart/development.rst b/docs/Quickstart/development.rst index ba93209b..8a0da4f7 100644 --- a/docs/Quickstart/development.rst +++ b/docs/Quickstart/development.rst @@ -2,7 +2,7 @@ Development ----------- -.. include:: ../../README.rst +.. include:: ../README.rst :start-after: developers-start :end-before: developers-end diff --git a/docs/Quickstart/requirements.rst b/docs/Quickstart/requirements.rst index 061e7640..c25553ac 100644 --- a/docs/Quickstart/requirements.rst +++ b/docs/Quickstart/requirements.rst @@ -1,4 +1,4 @@ -.. include:: ../../README.rst +.. include:: ../README.rst :start-after: requirements-start :end-before: requirements-end diff --git a/docs/Quickstart/setup.rst b/docs/Quickstart/setup.rst index 1616f40c..097771e2 100644 --- a/docs/Quickstart/setup.rst +++ b/docs/Quickstart/setup.rst @@ -1,13 +1,13 @@ -.. include:: ../../README.rst +.. include:: ../README.rst :start-after: installation-start :end-before: installation-end -.. include:: ../../README.rst +.. include:: ../README.rst :start-after: tests-start :end-before: tests-end -.. include:: ../../README.rst +.. include:: ../README.rst :start-after: features-modules-start :end-before: features-modules-end diff --git a/README.rst b/docs/README.rst similarity index 97% rename from README.rst rename to docs/README.rst index 918b85d4..482a1e35 100644 --- a/README.rst +++ b/docs/README.rst @@ -408,9 +408,9 @@ Reference .. reference-start If you use any of the AQME modules, please include this citation: -AQME v1.4, Alegre-Requena, J. V.; Sowndarya, S.; Alturaifi, T.; Pérez-Soto, R.; -Paton, R. ChemRxiv 2022, -DOI: `10.26434/chemrxiv-2022-dnc48 `__ . +AQME v1.4, Alegre-Requena, J. V.; Sowndarya, S.; Pérez-Soto, R.; Alturaifi, T.; +Paton, R. AQME: Automated Quantum Mechanical Environments for Researchers and Educators. +Wiley Interdiscip. Rev. Comput. Mol. Sci. 2023, DOI: 10.1002/wcms.1663. Additionally, please include the corresponding references for the following programs: * If you used CSEARCH with RDKit methods: `RDKit `__ diff --git a/docs/index.rst b/docs/index.rst index 462a4e13..53c3ac44 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,13 +2,13 @@ |aqme_banner| -.. include:: ../README.rst +.. include:: README.rst :start-after: badges-start :end-before: badges-end .. _github: https://github.com/jvalegre/aqme -.. include:: ../README.rst +.. include:: README.rst :start-after: checkboxes-start :end-before: checkboxes-end @@ -16,11 +16,11 @@ Welcome to AQME's documentation! ===================================== -.. include:: ../README.rst +.. include:: README.rst :start-after: introduction-start :end-before: introduction-end -.. include:: ../README.rst +.. include:: README.rst :start-after: reference-start :end-before: reference-end diff --git a/meta.yaml b/meta.yaml deleted file mode 100644 index b69e867b..00000000 --- a/meta.yaml +++ /dev/null @@ -1,55 +0,0 @@ -{% set name = "aqme" %} -{% set version = "1.4.4" %} - -package: - name: {{ name|lower }} - version: {{ version }} - -source: - url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/aqme-{{ version }}.tar.gz - sha256: 8fdbf1c33d4571073127ddd18f9b06cbbfe70ea628dd41522426e1b6514a3621 - -build: - noarch: python - script: {{ PYTHON }} -m pip install . -vv - number: 0 - -requirements: - host: - - pip - - python >=3.6 - run: - - ase - - cclib - - cffi - - matplotlib-base - - numpy - - pandas - - progress - - python >=3.6 - - pyyaml - - seaborn - - openbabel - - rdkit - - goodvibes - - py3Dmol - - ipywidgets - -test: - imports: - - aqme - commands: - - pip check - requires: - - pip - -about: - home: https://github.com/jvalegre/aqme - summary: Automated Quantum Mechanical Environments - license: MIT - license_file: LICENSE - -extra: - recipe-maintainers: - - jvalegre - - shreesowndarya diff --git a/setup.py b/setup.py index f8f0278b..779f03b6 100644 --- a/setup.py +++ b/setup.py @@ -4,10 +4,6 @@ # read the contents of your README file from os import path -this_directory = path.abspath(path.dirname(__file__)) -with io.open(path.join(this_directory, "README.md"), encoding="utf-8") as f: - long_description = f.read() - setup( name="aqme", packages=find_packages(exclude=["tests"]), @@ -15,7 +11,7 @@ version="1.4.4", license="MIT", description="Automated Quantum Mechanical Environments", - long_description="Automated Quantum Mechanical Environments", + long_description="Documentation in Read The Docs: https://aqme.readthedocs.io", long_description_content_type="text/markdown", author="Shree Sowndarya S. V., Juan V. Alegre Requena", author_email="svss@colostate.edu, jvalegre@unizar.es", From 7cd36e8fe6fcfd52cb78f938263a37ddfbfb6888 Mon Sep 17 00:00:00 2001 From: Juanvi Alegre-Requena Date: Fri, 24 Feb 2023 18:28:18 +0100 Subject: [PATCH 4/6] 1. Adapting auto_metal_atoms option --- .../CSEARCH_RDKit_organics.ipynb | 44 -------- .../CSEARCH_RDKit_organometallic.ipynb | 51 +-------- .../CSEARCH_fullmonte_organics.ipynb | 45 -------- ...RR_Analysis_with_isomerization_check.ipynb | 18 ++-- ..._Simple_analysis_for_json_generation.ipynb | 64 ----------- ...ORR_Standard_Analysis_and_full_check.ipynb | 22 ---- aqme/aqme.py | 2 - aqme/argument_parser.py | 1 - aqme/cmin.py | 68 +++--------- aqme/csearch/base.py | 55 +++------- aqme/utils.py | 101 +----------------- .../conformer_search/metal_complex.rst | 7 +- .../conformer_search/metal_complex.rst | 3 +- docs/Misc/versions.rst | 4 +- tests/test_cmin.py | 8 +- tests/test_csearch.py | 61 +---------- 16 files changed, 52 insertions(+), 502 deletions(-) delete mode 100644 Example_workflows/QCORR_processing_QM_outputs/QCORR_Simple_analysis_for_json_generation.ipynb diff --git a/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_RDKit_organics.ipynb b/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_RDKit_organics.ipynb index f1a6a223..45f9739a 100644 --- a/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_RDKit_organics.ipynb +++ b/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_RDKit_organics.ipynb @@ -128,50 +128,6 @@ "qprep(destination=com_path,files=sdf_rdkit_files,program='gaussian',\n", " qm_input='wb97xd/6-31+G* opt freq',mem='24GB',nprocs=8)" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "###### Bonus 3: If you want to use the same functions using a YAML file that stores all the variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# to load the variables from a YAML file, use the varfile option\n", - "csearch(varfile='FILENAME.yaml')\n", - "\n", - "# for each option, specify it in the YAML file as follows:\n", - "# program='rdkit' --> program: 'rdkit'\n", - "# name='quinine' --> name: 'quinine'\n", - "# etc" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "###### Bonus 4: If you want to use the same functions through command lines" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "csearch(destination=sdf_path,smi=smi,name='quinine',program='rdkit')\n", - "\n", - "# for each option, specify it in the command line as follows:\n", - "# program='rdkit' --> --program 'rdkit'\n", - "# name='quinine' --> --name quinine\n", - "# etc\n", - "# for example: python -m aqme --program rdkit --smi COC1=CC2=C(C=CN=C2C=C1)[C@H]([C@@H]3C[C@@H]4CCN3C[C@@H]4C=C)O --name quinine" - ] } ], "metadata": { diff --git a/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_RDKit_organometallic.ipynb b/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_RDKit_organometallic.ipynb index 05e527ef..2d3390ab 100644 --- a/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_RDKit_organometallic.ipynb +++ b/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_RDKit_organometallic.ipynb @@ -35,11 +35,11 @@ "# 2) Simple RDKit sampling (program='rdkit')\n", "# 3) SMILES string (smi=smi_metal)\n", "# 4) Name for the output SDF files (name='Pd_complex')\n", - "# 5) The metal is Pd (metal=['Pd'])\n", - "# 6) Oxidation number +2 (metal_oxi=[2])\n", + "# 5) Charge is -1 (charge=-1)\n", + "# 6) Multiplicity is 1 (mult=1)\n", "# 7) The complex is squareplanar (complex_type='squareplanar')\n", "csearch(destination=sdf_path,program='rdkit',smi=smi_metal,name='Pd_complex',\n", - " metal_atoms=['Pd'],metal_oxi=[2],mult=1,complex_type='squareplanar')" + " charge=-1,mult=1,complex_type='squareplanar')" ] }, { @@ -73,51 +73,6 @@ " bs_gen='def2svp',bs_nogen='6-31G*',gen_atoms=['Pd'],mem='24GB',nprocs=8)\n", " " ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "###### Bonus 1: If you want to use the same functions using a YAML file that stores all the variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# to load the variables from a YAML file, use the varfile option\n", - "csearch(varfile='FILENAME.yaml')\n", - "\n", - "# for each option, specify it in the YAML file as follows:\n", - "# program='rdkit' --> program: 'rdkit'\n", - "# name='Pd_complex' --> name: 'Pd_complex'\n", - "# etc" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "###### Bonus 2: If you want to use the same functions through command lines" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "csearch(w_dir_main=w_dir_main,destination=sdf_path,program='rdkit',smi=smi_metal,name='Pd_complex',\n", - " metal=['Pd'],metal_oxi=[2],mult=1,complex_type='squareplanar')\n", - "\n", - "# for each option, specify it in the command line as follows:\n", - "# program='rdkit' --> --program 'rdkit'\n", - "# name='Pd_complex' --> --name Pd_complex\n", - "# etc\n", - "# for example: python -m aqme --program rdkit --smi I[Pd](Cl)([PH3+])[N+]1=CC=CC=C1 --name Pd_complex" - ] } ], "metadata": { diff --git a/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_fullmonte_organics.ipynb b/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_fullmonte_organics.ipynb index 073c986f..be6b1708 100644 --- a/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_fullmonte_organics.ipynb +++ b/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_fullmonte_organics.ipynb @@ -67,51 +67,6 @@ " qm_input='wb97xd/6-31+G* opt freq',mem='24GB',nprocs=8)\n", " " ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "###### Bonus 1: If you want to use the same functions using a YAML file that stores all the variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# to load the variables from a YAML file, use the varfile option\n", - "csearch(varfile='FILENAME.yaml')\n", - "\n", - "# for each option, specify it in the YAML file as follows:\n", - "# program='fullmonte' --> program: 'fullmonte'\n", - "# name='quinine' --> name: 'quinine'\n", - "# etc" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "###### Bonus 2: If you want to use the same functions through command lines" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "csearch(destination=sdf_path,\n", - " smi=smi,name='quinine',program='fullmonte')\n", - "\n", - "# for each option, specify it in the command line as follows:\n", - "# program='fullmonte' --> --program 'fullmonte'\n", - "# name='quinine' --> --name quinine\n", - "# etc\n", - "# for example: python -m aqme --program fullmonte --smi COC1=CC2=C(C=CN=C2C=C1)[C@H]([C@@H]3C[C@@H]4CCN3C[C@@H]4C=C)O --name quinine" - ] } ], "metadata": { diff --git a/Example_workflows/QCORR_processing_QM_outputs/QCORR_Analysis_with_isomerization_check.ipynb b/Example_workflows/QCORR_processing_QM_outputs/QCORR_Analysis_with_isomerization_check.ipynb index 2ccda5b3..8ab3870d 100644 --- a/Example_workflows/QCORR_processing_QM_outputs/QCORR_Analysis_with_isomerization_check.ipynb +++ b/Example_workflows/QCORR_processing_QM_outputs/QCORR_Analysis_with_isomerization_check.ipynb @@ -36,20 +36,13 @@ "qcorr(files=files,freq_conv='opt=(calcfc,maxstep=5)',\n", " isom_type='com',isom_inputs=isom_inputs)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:svss-bm]", + "display_name": "base", "language": "python", - "name": "conda-env-svss-bm-py" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -61,7 +54,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "6c100345108a7047ea96fae483cb64f49bdc23a8b225db90a5987a96959e820b" + } } }, "nbformat": 4, diff --git a/Example_workflows/QCORR_processing_QM_outputs/QCORR_Simple_analysis_for_json_generation.ipynb b/Example_workflows/QCORR_processing_QM_outputs/QCORR_Simple_analysis_for_json_generation.ipynb deleted file mode 100644 index 2298165b..00000000 --- a/Example_workflows/QCORR_processing_QM_outputs/QCORR_Simple_analysis_for_json_generation.ipynb +++ /dev/null @@ -1,64 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Analysis of QM files" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### QCORR analysis of single-point energy calculations (to generate json files)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# QCORR analysis of Gaussian 16 single_point energy calculations with no fullcheck and generation of json files\n", - "import os,glob\n", - "from aqme.qcorr import qcorr\n", - "\n", - "files=glob.glob(os.getcwd()+'/QCORR_3/*.log')\n", - "\n", - "# run the QCORR analyzer, with:\n", - "# 1) Names of the QM output files (files='*.log')\n", - "# 1) Skip the full check analysis (fullcheck=False)\n", - "qcorr(files=files,fullcheck=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python [conda env:svss-bm]", - "language": "python", - "name": "conda-env-svss-bm-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Example_workflows/QCORR_processing_QM_outputs/QCORR_Standard_Analysis_and_full_check.ipynb b/Example_workflows/QCORR_processing_QM_outputs/QCORR_Standard_Analysis_and_full_check.ipynb index 328b182a..ba5d478a 100644 --- a/Example_workflows/QCORR_processing_QM_outputs/QCORR_Standard_Analysis_and_full_check.ipynb +++ b/Example_workflows/QCORR_processing_QM_outputs/QCORR_Standard_Analysis_and_full_check.ipynb @@ -31,28 +31,6 @@ "# 2) Detect and fix calcs that converged during geometry optimization but didn't converge during frequency calcs (freq_conv='opt=(calcfc,maxstep=5)')\n", "qcorr(files=files,freq_conv='opt=(calcfc,maxstep=5)')\n" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "###### Bonus 1: If you want to use the same functions through command lines" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qcorr_calcs = qcorr(files='*.log',freq_conv='opt=(calcfc,maxstep=5)')\n", - "\n", - "# for each option, specify it in the command line as follows:\n", - "# files='*.log' --> --files *.log\n", - "# freq_conv='opt=(calcfc,maxstep=5)' --> --freq_conv opt=(calcfc,maxstep=5)\n", - "# etc\n", - "# for example: python -m aqme --files *.log --freq_conv opt=(calcfc,maxstep=5)" - ] } ], "metadata": { diff --git a/aqme/aqme.py b/aqme/aqme.py index 6120b7fa..3b1bdbb3 100644 --- a/aqme/aqme.py +++ b/aqme/aqme.py @@ -75,7 +75,6 @@ def main(): max_workers=args.max_workers, metal_atoms=args.metal_atoms, auto_metal_atoms=args.auto_metal_atoms, - metal_oxi=args.metal_oxi, metal_idx=args.metal_idx, complex_coord=args.complex_coord, metal_sym=args.metal_sym, @@ -131,7 +130,6 @@ def main(): charge=args.charge, mult=args.mult, metal_atoms=args.metal_atoms, - metal_oxi=args.metal_oxi, ewin_cmin=args.ewin_cmin, initial_energy_threshold=args.initial_energy_threshold, energy_threshold=args.energy_threshold, diff --git a/aqme/argument_parser.py b/aqme/argument_parser.py index 3ec40437..50a215a8 100644 --- a/aqme/argument_parser.py +++ b/aqme/argument_parser.py @@ -23,7 +23,6 @@ "mult": None, "complex_coord": [], "complex_type": "", - "metal_oxi": [], "metal_idx": [], "metal_sym": [], "constraints_atoms": [], diff --git a/aqme/cmin.py b/aqme/cmin.py index 86812fb3..663470bd 100644 --- a/aqme/cmin.py +++ b/aqme/cmin.py @@ -30,14 +30,6 @@ (if the files come from CSEARCH, which adds the property "Mult") or calculates it from the generated mol object. Be careful with the automated calculation of mult from mol objects when using metals! - metal_atoms : list of str, default=[] - Specify metal atom(s) of the system as [ATOM_TYPE]. Multiple metals can be - used simultaneously (i.e. ['Pd','Ir']). This option is important to - calculate the charge of metal complexes based on SMILES strings. Requires - the use of metal_oxi. - metal_oxi : list of int, default=[] - Specify metal oxidation state as [NUMBER]. Multiple metals can be used - simultaneously (i.e. [2,3]). ewin_cmin : float, default=5.0 Energy window in kcal/mol to discard conformers (i.e. if a conformer is more than the E window compared to the most stable conformer) @@ -109,7 +101,6 @@ import time from aqme.utils import ( load_variables, - rules_get_charge, substituted_mol, mol_from_sdf_or_mol_or_mol2, add_prefix_suffix @@ -283,7 +274,7 @@ def compute_cmin(self, file): self.args.log.write("\nx Multiplicity is automatically calculated for ANI methods, do not use the mult option!") self.args.log.finalize() sys.exit() - charge,mult,final_mult,dup_data = self.charge_mult_cmin(dup_data, dup_data_idx, 'ani') + charge,mult,final_mult,dup_data = self.charge_mult_cmin(dup_data, dup_data_idx) elif self.args.program.lower() == "xtb": # sets charge and mult @@ -306,15 +297,15 @@ def compute_cmin(self, file): break if self.args.charge is None and charge_input is None: # if no charge/mult was specified or found, the charge is calculated using the mol object - charge,_,final_mult,_ = self.charge_mult_cmin(dup_data, dup_data_idx, 'xtb') + charge = 0 + self.args.log.write(f'nx No charge was assigned! Setting a value of 0, it can be changed with the charge option (or column in CSV inputs).') elif self.args.charge is None: charge = charge_input else: charge = self.args.charge if self.args.mult is None and mult_input is None: - if final_mult is None: - _,_,final_mult,_ = self.charge_mult_cmin(dup_data, dup_data_idx, 'xtb') - mult = final_mult + mult = 1 + self.args.log.write(f'nx No multiplicity was assigned! Setting a value of 1, it can be changed with the mult option (or column in CSV inputs).') elif self.args.mult is None: mult = mult_input else: @@ -562,9 +553,9 @@ def write_confs(self, conformers, selectedcids, log): log.write("x No conformers found!") - def charge_mult_cmin(self, dup_data, dup_data_idx, type_cmin): + def charge_mult_cmin(self, dup_data, dup_data_idx): """ - Retrieves charge and multiplicity arrays (for ANI) and values (for xTB) optimizations. + Retrieves charge and multiplicity arrays for ANI optimizations. Parameters ---------- @@ -587,43 +578,14 @@ def charge_mult_cmin(self, dup_data, dup_data_idx, type_cmin): mol_cmin = self.mols[0] - if type_cmin == 'xtb': - # check if metals and oxidation states are both used - if self.args.metal_atoms != []: - if self.args.metal_oxi == []: - self.args.log.write(f"\nx Metal atoms ({self.args.metal_atoms}) were specified without their corresponding oxidation state (metal_oxi option)") - self.args.log.finalize() - sys.exit() - - if self.args.metal_oxi != []: - if self.args.metal_atoms == []: - self.args.log.write(f"\nx Metal oxidation states ({self.args.metal_oxi}) were specified without their corresponding metal atoms (metal_atoms option)") - self.args.log.finalize() - sys.exit() - - # assigns idx to metal atoms - self.args.metal_idx, self.args.complex_coord, self.args.metal_sym = substituted_mol(self, mol_cmin, "noI") - charge, metal_found = rules_get_charge(mol_cmin, self.args) - mult = None - final_mult = Descriptors.NumRadicalElectrons(mol_cmin) + 1 - if metal_found: - # since RDKit gets the multiplicity of the metal with valence 0, the real multiplicity - # value needs to be adapted with the charge. If multiplicity is different than 1 or 2, - # the user must specify the value with the mult option - if (charge % 2) == 1 and charge != 0: # odd charges (i.e. +1, +3, etc) - if final_mult == 1: - final_mult = final_mult + 1 - if final_mult == 2: - final_mult = final_mult - 1 - - elif type_cmin == 'ani': - charge = [] - mult = [] - for _, atom in enumerate(mol_cmin.GetAtoms()): - charge.append(atom.GetFormalCharge()) - mult.append(atom.GetNumRadicalElectrons()) - TotalElectronicSpin = np.sum(mult) / 2 - final_mult = int((2 * TotalElectronicSpin) + 1) + + charge = [] + mult = [] + for _, atom in enumerate(mol_cmin.GetAtoms()): + charge.append(atom.GetFormalCharge()) + mult.append(atom.GetNumRadicalElectrons()) + TotalElectronicSpin = np.sum(mult) / 2 + final_mult = int((2 * TotalElectronicSpin) + 1) dup_data.at[dup_data_idx, "Overall charge"] = np.sum(charge) dup_data.at[dup_data_idx, "Mult"] = final_mult diff --git a/aqme/csearch/base.py b/aqme/csearch/base.py index 88e98b68..6c80ab3e 100644 --- a/aqme/csearch/base.py +++ b/aqme/csearch/base.py @@ -90,14 +90,10 @@ Only organometallic molecules ............................. - metal_atoms : list of str, default=[] - Specify metal atom(s) of the system as [ATOM_TYPE]. Multiple metals can - be used simultaneously (i.e. ['Pd','Ir']). This option is important to - calculate the charge of metal complexes based on SMILES strings. Requires - the use of metal_oxi. - metal_oxi : list of int, default=[] - Specify metal oxidation state as [NUMBER]. Multiple metals can be used - simultaneously (i.e. [2,3]). + auto_metal_atoms : bool, default=True + Automatically detect metal atoms for the RDKit conformer generation. Charge + and mult should be specified as well since the automatic charge and mult + detection might not be precise. complex_type : str, default='' Forces the metal complexes to adopt a predefined geometry. This option is especially relevant when RDKit predicts wrong complex geometries or gives @@ -208,7 +204,6 @@ from aqme.csearch.templates import template_embed, check_metal_neigh from aqme.csearch.fullmonte import generating_conformations_fullmonte, realign_mol from aqme.utils import ( - rules_get_charge, substituted_mol, load_variables, set_metal_atomic_number @@ -491,21 +486,10 @@ def compute_confs( template_opt = False - # check if metals and oxidation states are both used - if self.args.metal_atoms != []: - if self.args.metal_oxi == []: - self.args.log.write(f"\nx Metal atoms ({self.args.metal_atoms}) were specified without their corresponding oxidation state (metal_oxi option)") - self.args.log.finalize() - sys.exit() - - if self.args.metal_oxi != []: - if self.args.metal_atoms == []: - self.args.log.write(f"\nx Metal oxidation states ({self.args.metal_oxi}) were specified without their corresponding metal atoms (metal_atoms option)") - self.args.log.finalize() - sys.exit() - + # detects metal atoms if self.args.auto_metal_atoms: - _ = self.find_metal_atom(mol) + _ = self.find_metal_atom(mol,charge,mult) + # replaces the metal for an I atom if len(self.args.metal_atoms) >= 1: ( @@ -589,7 +573,8 @@ def compute_confs( self.final_dup_data = pd.concat(frames, ignore_index=True, sort=True) # automatic detection of metal atoms - def find_metal_atom(self,mol): + def find_metal_atom(self,mol,charge,mult): + self.args.metal_atoms = [] # for batch jobs such as CSV inputs with many SMILES transition_metals = ['Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og'] @@ -597,7 +582,9 @@ def find_metal_atom(self,mol): if atom.GetSymbol() in transition_metals: self.args.metal_atoms.append(atom.GetSymbol()) if len(self.args.metal_atoms) > 0: - self.args.log.write(f"\no AQME recognized the following metal atoms: {self.args.metal_atoms}") + self.args.log.write(f"\no AQME recognized the following metal atoms: {self.args.metal_atoms}") + if charge is None or mult is None: + self.args.log.write(f"\nx The automated charge and multiplicity calculation might not be precise for metal complexes! You should use the charge and mult options (or the charge and mult columns in CSV inputs).") def conformer_generation( self, @@ -626,25 +613,11 @@ def conformer_generation( status = None # Set charge and multiplicity - metal_found = False # user can overwrite charge and mult with the corresponding arguments if charge is None: - if not len(self.args.metal_atoms) >= 1: - charge = Chem.GetFormalCharge(mol) - else: - charge, metal_found = rules_get_charge(mol, self.args) - + charge = Chem.GetFormalCharge(mol) if mult is None: mult = Descriptors.NumRadicalElectrons(mol) + 1 - if metal_found: - # since RDKit gets the multiplicity of the metal with valence 0, the real multiplicity - # value needs to be adapted with the charge. If multiplicity is different than 1 or 2, - # the user must specify the value with the mult option - if (charge % 2) == 1 and charge != 0: # odd charges (i.e. +1, +3, etc) - if mult == 1: - mult = mult + 1 - if mult == 2: - mult = mult - 1 dup_data.at[dup_data_idx, "Real charge"] = charge dup_data.at[dup_data_idx, "Mult"] = mult @@ -1221,7 +1194,7 @@ def rdkit_to_sdf( Chem.SanitizeMol(mol) mol = Chem.AddHs(mol) except Chem.AtomValenceException: # this happens sometimes with complex metals when substituting the metal with an I atom - self.args.log.write(f'\nx The species provided could not be converted into a mol object wth RDKit. It normally happens with tricky metal complexes and might be fixed with a couple tricks (i.e., using the metal_atoms="[\'M\']" option, changing a single bond + positive charge with a double bond).') + self.args.log.write(f'\nx The species provided could not be converted into a mol object wth RDKit. It normally happens with tricky metal complexes and might be fixed with a couple tricks (i.e., changing a single bond + positive charge with a double bond).') return -1, None, None, None mol.SetProp("_Name", name) diff --git a/aqme/utils.py b/aqme/utils.py index c4ac0ec2..aba3746c 100644 --- a/aqme/utils.py +++ b/aqme/utils.py @@ -227,103 +227,6 @@ def get_info_input(file): return atoms_and_coords, charge, mult -def rules_get_charge(mol, args): - """ - Automatically sets the charge for metal complexes - """ - - C_group = ["C", "Si", "Ge", "Sn"] - N_group = ["N", "P", "As", "Sb"] - O_group = ["O", "S", "Se", "Te"] - F_group = ["F", "Cl", "Br", "I"] - - - M_ligands, N_carbenes, bridge_atoms, C_accounted, neighbours = [], [], [], [], [] - charge_rules = np.zeros(len(mol.GetAtoms()), dtype=int) - neighbours, metal_found = [], False - try: - Chem.SanitizeMol(mol) - except Chem.AtomValenceException: # this happens sometimes with complex metals when substituting the metal with an I atom - args.log.write(f'\nx The charge can not be safely calculated for the system provided. If the charge is not right, you can assign it manually with charge=INT.') - for i, atom in enumerate(mol.GetAtoms()): - # get the neighbours of metal atom and calculate the charge of metal center + ligands - if atom.GetIdx() in args.metal_idx: - # a sanitation step is needed to ensure that metals and ligands show correct valences - metal_found = True - charge_idx = args.metal_idx.index(atom.GetIdx()) - neighbours = atom.GetNeighbors() - charge_rules[i] = args.metal_oxi[charge_idx] - for neighbour in neighbours: - double_bond = False - M_ligands.append(neighbour.GetIdx()) - if neighbour.GetTotalValence() == 4: - if neighbour.GetSymbol() in C_group: - # correct for C=C interacting with M with pi interactions - # when drawing these rings in ChemDraw, all the aromatic C atoms will be linked - # to the M atom as part of 3 member rings - atom_rings = mol.GetRingInfo().AtomRings() - for ring in atom_rings: - if neighbour.GetIdx() in list(ring) and args.metal_idx[0] in list(ring): - # for each double bond - if len(ring) == 3: - C_count, C_accounted_indiv = 0,[] - for ring_member in ring: - if mol.GetAtoms()[ring_member].GetSymbol() == "C" and ring_member not in C_accounted: - C_accounted_indiv.append(ring_member) - C_count += 1 - if C_count == 2: - for ele in C_accounted_indiv: - C_accounted.append(ele) - break - # first, detects carbenes to adjust charge - carbene_like = False - bridge_ligand = False - if neighbour.GetIdx() in C_accounted: - double_bond = True - for inside_neighbour in neighbour.GetNeighbors(): - if inside_neighbour.GetSymbol() in N_group: - if inside_neighbour.GetTotalValence() == 4: - for N_neighbour in inside_neighbour.GetNeighbors(): - # this option detects bridge ligands that connect two metals such as M--CN--M - # we use I since the M is still represented as I at this point - if N_neighbour.GetSymbol() == "I": - bridge_ligand = True - bridge_atoms.append(inside_neighbour.GetIdx()) - if not bridge_ligand: - carbene_like = True - N_carbenes.append(inside_neighbour.GetIdx()) - if not carbene_like and not double_bond: - charge_rules[i] = charge_rules[i] - 1 - elif neighbour.GetTotalValence() == 3: - if neighbour.GetSymbol() in N_group and neighbour.GetFormalCharge() == 0: - charge_rules[i] = charge_rules[i] - 1 - elif neighbour.GetTotalValence() == 2: - # radical chalcogen atoms (i.e., Cu-OH(rad)) - if neighbour.GetSymbol() in O_group and neighbour.GetFormalCharge() == 0 and len(neighbour.GetNeighbors()) == 2: - charge_rules[i] = charge_rules[i] - 1 - # double bonded chalcogen atom (i.e., V=O) - elif neighbour.GetSymbol() in O_group and neighbour.GetFormalCharge() == 0 and len(neighbour.GetNeighbors()) == 1: - charge_rules[i] = charge_rules[i] - 2 - elif neighbour.GetTotalValence() == 1: - if neighbour.GetSymbol() in O_group: - charge_rules[i] = charge_rules[i] - 2 - if neighbour.GetSymbol() in F_group: - charge_rules[i] = charge_rules[i] - 1 - - # for charges not in the metal, neighbours or exceptions (i.e., C=N+ from carbenes or CN from bridge atoms) - invalid_charged_atoms = M_ligands + N_carbenes + bridge_atoms + args.metal_idx - for i, atom in enumerate(mol.GetAtoms()): - if atom.GetIdx() not in invalid_charged_atoms: - charge_rules[i] = atom.GetFormalCharge() - - charge = np.sum(charge_rules) - if not metal_found: - # for organic molecules when using a list containing organic and organometallics molecules mixed - charge = Chem.GetFormalCharge(mol) - - return charge, metal_found - - def substituted_mol(self, mol, checkI): """ Returns a molecule object in which all metal atoms specified in args.metal_atoms @@ -469,13 +372,13 @@ def command_line_args(): kwargs[arg_name] = glob.glob(value) else: # this converts the string parameters to lists - if arg_name.lower() in ["files", "metal_oxi", "metal_atoms", "gen_atoms", "constraints_atoms", "constraints_dist", "constraints_angle", "constraints_dihedral", "atom_types", "cartesians", "nmr_atoms", "nmr_slope", "nmr_intercept"]: + if arg_name.lower() in ["files", "gen_atoms", "constraints_atoms", "constraints_dist", "constraints_angle", "constraints_dihedral", "atom_types", "cartesians", "nmr_atoms", "nmr_slope", "nmr_intercept"]: if not isinstance(value, list): try: value = ast.literal_eval(value) except (SyntaxError, ValueError): # this line fixes issues when using "[X]" or ["X"] instead of "['X']" when using lists - if arg_name.lower() in ["files", "metal_oxi", "metal_atoms", "gen_atoms"]: + if arg_name.lower() in ["files", "gen_atoms"]: value = value.replace('[',']').replace(',',']').split(']') while('' in value): value.remove('') diff --git a/docs/Examples/examples_commandline/conformer_search/metal_complex.rst b/docs/Examples/examples_commandline/conformer_search/metal_complex.rst index 48e32066..22c9e93d 100644 --- a/docs/Examples/examples_commandline/conformer_search/metal_complex.rst +++ b/docs/Examples/examples_commandline/conformer_search/metal_complex.rst @@ -19,10 +19,7 @@ Pd complex using RDKit using a template for square planar complexes. | |metal_comp_chemdraw| | |metal_comp_3D| | +--------------------------+--------------------+ -Here we have to specify the metalic centers :code:`--metal_atoms "['Pd']"` as well as -their oxidation state :code:`--metal_oxi "[2]"`. - -We can also specify the multiplicity :code:`--mult 1` +We can also specify multiplicity :code:`--mult 1` and charge :code:`--charge -1` We also need to specify which template geometry to use for the complex :code:`--complex_type squareplanar` @@ -40,5 +37,5 @@ The full command that we will execute will look as follows: .. code:: shell - python -m aqme --csearch --smi "I[Pd]([PH3+])(F)Cl" --destination Pd_sdf_files --name Pd_complex --program rdkit --metal_atoms "['Pd']" --metal_oxi "[2]" --mult 1 --complex_type squareplanar + python -m aqme --csearch --smi "I[Pd]([PH3+])(F)Cl" --destination Pd_sdf_files --name Pd_complex --program rdkit --charge -1 --mult 1 --complex_type squareplanar diff --git a/docs/Examples/examples_python/conformer_search/metal_complex.rst b/docs/Examples/examples_python/conformer_search/metal_complex.rst index 1f68fa33..75f475ea 100644 --- a/docs/Examples/examples_python/conformer_search/metal_complex.rst +++ b/docs/Examples/examples_python/conformer_search/metal_complex.rst @@ -44,7 +44,6 @@ molecule. smi=smiles, name='Pd_complex', program='rdkit', - metal_atoms=['Pd',], # Symbol of transition metal atoms included - metal_oxi=[2,], # Oxidation number per metal_atom + charge=-1, # charge mult=1, # multiplicity complex_type='squareplanar') # Template geometry to use diff --git a/docs/Misc/versions.rst b/docs/Misc/versions.rst index 3aaaaac1..b721ffc1 100644 --- a/docs/Misc/versions.rst +++ b/docs/Misc/versions.rst @@ -5,7 +5,9 @@ Versions ======== Version 1.4.5 [`url `__] - - Suffix/prefix options work in CSEARCH, CMIN and QPREP + - Suffix/prefix options work in CSEARCH, CMIN and QPREP + - Automatic recognition of metals with the auto_metal_atom option + - In QPREP, if qm_input starts with "p ", the Gaussian inputs starts with "#p" Version 1.4.4 [`url `__] - When using a CSV as input, the user can specify charge and mult for each species by diff --git a/tests/test_cmin.py b/tests/test_cmin.py index 7249c325..4a169fdb 100644 --- a/tests/test_cmin.py +++ b/tests/test_cmin.py @@ -93,7 +93,7 @@ def test_cmin_methods( # # tests for parameters of cmin paramters # @pytest.mark.parametrize( -# "program, sdf, metal_complex,metal_atoms,metal_oxi,complex_type, charge, mult, ewin_cmin, initial_energy_threshold, energy_threshold,rms_threshold, output_nummols", +# "program, sdf, metal_complex,complex_type, charge, mult, ewin_cmin, initial_energy_threshold, energy_threshold,rms_threshold, output_nummols", # [ # # tests for conformer generation with RDKit # ( @@ -101,8 +101,6 @@ def test_cmin_methods( # "pentane_rdkit.sdf", # False, # None, -# None, -# None, # 0, # 1, # 5, @@ -115,8 +113,6 @@ def test_cmin_methods( # "xtb", # "Pd_complex_0_rdkit.sdf", # True, -# ["Pd"], -# [2], # "squareplanar", # 0, # 1, @@ -132,8 +128,6 @@ def test_cmin_methods( # program, # sdf, # metal_complex, -# metal_atoms, -# metal_oxi, # complex_type, # charge, # mult, diff --git a/tests/test_csearch.py b/tests/test_csearch.py index a6569df9..0b43f1e1 100644 --- a/tests/test_csearch.py +++ b/tests/test_csearch.py @@ -409,47 +409,6 @@ def test_csearch_fullmonte_parameters( assert mult == int(mols[0].GetProp("Mult")) os.chdir(w_dir_main) -# tests for parameters of metals with double bonds -@pytest.mark.parametrize( - "program, smi, name, charge", - [ - ("rdkit", "N[SiH](N)[Cu]1CC1", "Cu_ethene", 0), - ("rdkit", "N[SiH](N)[Cu]1234C5C1C2C3C54", "Cu_Cp", -1), - ("rdkit", "N[SiH](N)[Cu]12345C6C1C2C3C4C65", "Cu_Ph", 0), - ], -) -def test_double_bond_chrg( - program, - smi, - name, - charge, -): - os.chdir(csearch_methods_dir) - # runs the program with the different tests - csearch( - program=program, - smi=smi, - name=name, - sample=10, - metal_atoms=['Cu'], - metal_oxi=[1] - ) - - # tests here - file = str("CSEARCH/" + name + "_" + program + ".sdf") - mols = rdkit.Chem.SDMolSupplier(file, removeHs=False) - assert charge == int(mols[0].GetProp("Real charge")) - # check that H atoms are included - outfile = open(file,"r") - outlines = outfile.readlines() - outfile.close() - Hatoms_found = False - for line in outlines: - if "H 0" in line: - Hatoms_found = True - assert Hatoms_found - os.chdir(w_dir_main) - # tests for parameters of csearch rdkit @pytest.mark.parametrize( @@ -602,7 +561,7 @@ def test_csearch_rdkit_summ_parameters( # tests for individual organic molecules and metal complexes with different types of csearch methods @pytest.mark.parametrize( - "program, smi, name, complex, metal_complex, metal, metal_oxi, complex_type, constraints_dist, constraints_angle, constraints_dihedral, charge, mult, crest_keywords, destination, output_nummols", + "program, smi, name, complex, metal_complex, complex_type, constraints_dist, constraints_angle, constraints_dihedral, charge, mult, crest_keywords, destination, output_nummols", [ # tests for conformer generation with RDKit, SUMM, FullMonte and CREST ( @@ -666,8 +625,6 @@ def test_csearch_rdkit_summ_parameters( "Pd_metal_only", False, True, - ["Pd"], - [2], None, None, None, @@ -685,8 +642,6 @@ def test_csearch_rdkit_summ_parameters( "Pd_complex", False, True, - ["Pd"], - [2], "squareplanar", None, None, @@ -703,8 +658,6 @@ def test_csearch_rdkit_summ_parameters( "Cu_trigonal", False, True, - ["Cu"], - [1], "trigonalplanar", None, None, @@ -721,8 +674,6 @@ def test_csearch_rdkit_summ_parameters( "V_squarepyramidal", False, True, - ["V"], - [4], "squarepyramidal", None, None, @@ -740,8 +691,6 @@ def test_csearch_rdkit_summ_parameters( "Ag_complex_crest", False, True, - ["Ag"], - [1], "linear", None, None, @@ -833,8 +782,6 @@ def test_csearch_methods( name, complex, metal_complex, - metal, - metal_oxi, complex_type, constraints_dist, constraints_angle, @@ -871,9 +818,8 @@ def test_csearch_methods( program=program, smi=smi, name=name, - metal_atoms=metal, - metal_oxi=metal_oxi, mult=mult, + charge=charge, sample=10 ) else: @@ -882,8 +828,7 @@ def test_csearch_methods( program=program, smi=smi, name=name, - metal_atoms=metal, - metal_oxi=metal_oxi, + charge=charge, complex_type=complex_type, mult=mult, sample=10 From 8f1d0ac335be5ce713efef810e62b9c3c0a6db5d Mon Sep 17 00:00:00 2001 From: Juanvi Alegre-Requena Date: Sat, 25 Feb 2023 10:52:56 +0100 Subject: [PATCH 5/6] 1. Adding #p in qprep-gaussian --- aqme/qprep.py | 5 ++++- aqme/utils.py | 1 + tests/test_csearch.py | 44 +++++++++++++++---------------------------- tests/test_qprep.py | 18 +++++++++++++----- 4 files changed, 33 insertions(+), 35 deletions(-) diff --git a/aqme/qprep.py b/aqme/qprep.py index 9ac3f653..6929eff3 100644 --- a/aqme/qprep.py +++ b/aqme/qprep.py @@ -256,7 +256,10 @@ def get_header(self, qprep_data): txt += f'%chk={name_file}.chk\n' txt += f"%nprocshared={self.args.nprocs}\n" txt += f"%mem={self.args.mem}\n" - txt += f"# {self.args.qm_input}\n\n" + if self.args.qm_input[:2] != 'p ': + txt += f"# {self.args.qm_input}\n\n" + else: + txt += f"#{self.args.qm_input}\n\n" txt += f'{name_file}\n\n' txt += f'{qprep_data["charge"]} {qprep_data["mult"]}\n' diff --git a/aqme/utils.py b/aqme/utils.py index aba3746c..6454c201 100644 --- a/aqme/utils.py +++ b/aqme/utils.py @@ -401,6 +401,7 @@ def load_variables(kwargs, aqme_module, create_dat=True): txt_yaml = "" if self.varfile is not None: self, txt_yaml = load_from_yaml(self) + if aqme_module != "command": self.initial_dir = Path(os.getcwd()) diff --git a/tests/test_csearch.py b/tests/test_csearch.py index 0b43f1e1..c24661d9 100644 --- a/tests/test_csearch.py +++ b/tests/test_csearch.py @@ -571,8 +571,6 @@ def test_csearch_rdkit_summ_parameters( False, False, None, - None, - None, [], [], [], @@ -589,8 +587,6 @@ def test_csearch_rdkit_summ_parameters( # False, # False, # None, - # None, - # None, # [], # [], # [], @@ -607,8 +603,6 @@ def test_csearch_rdkit_summ_parameters( False, False, None, - None, - None, [], [], [], @@ -626,9 +620,9 @@ def test_csearch_rdkit_summ_parameters( False, True, None, - None, - None, - None, + [], + [], + [], -1, 1, None, @@ -643,9 +637,9 @@ def test_csearch_rdkit_summ_parameters( False, True, "squareplanar", - None, - None, - None, + [], + [], + [], -1, 1, None, @@ -659,9 +653,9 @@ def test_csearch_rdkit_summ_parameters( False, True, "trigonalplanar", - None, - None, - None, + [], + [], + [], -2, 1, None, @@ -675,9 +669,9 @@ def test_csearch_rdkit_summ_parameters( False, True, "squarepyramidal", - None, - None, - None, + [], + [], + [], -2, 1, None, @@ -692,9 +686,9 @@ def test_csearch_rdkit_summ_parameters( False, True, "linear", - None, - None, - None, + [], + [], + [], 1, 1, None, @@ -709,8 +703,6 @@ def test_csearch_rdkit_summ_parameters( False, False, None, - None, - None, [], [], [], @@ -727,8 +719,6 @@ def test_csearch_rdkit_summ_parameters( True, False, None, - None, - None, [], [], [], @@ -745,8 +735,6 @@ def test_csearch_rdkit_summ_parameters( True, False, None, - None, - None, [], [], [], @@ -763,8 +751,6 @@ def test_csearch_rdkit_summ_parameters( True, False, None, - None, - None, [[4, 5, 1.8], [5, 9, 1.8]], [[4, 5, 9, 180]], [], diff --git a/tests/test_qprep.py b/tests/test_qprep.py index e3e6f462..26389aa3 100644 --- a/tests/test_qprep.py +++ b/tests/test_qprep.py @@ -38,6 +38,7 @@ "com_files", False, ), # test log inputs with defined charge and mult + ("com_gen", "p_print", "com_files", False), # test #p in qm_input (replaces previous files) ( "charge_mult", "sdf_files", @@ -88,6 +89,15 @@ def test_QPREP_analysis(test_type, init_folder, target_folder, restore_folder): ) # runs the program with the different tests + if init_folder == 'p_print': + qm_input = "p wb97xd/lanl2dz scrf=(smd,solvent=acetonitrile)" + line_2 = "#p wb97xd/lanl2dz scrf=(smd,solvent=acetonitrile)" + init_folder = 'sdf_files' + + else: + qm_input = "wb97xd/lanl2dz scrf=(smd,solvent=acetonitrile)" + line_2 = "# wb97xd/lanl2dz scrf=(smd,solvent=acetonitrile)" + w_dir_main = f"{path_qprep}/{init_folder}" destination = f"{path_qprep}/{init_folder}/{target_folder}" @@ -120,7 +130,7 @@ def test_QPREP_analysis(test_type, init_folder, target_folder, restore_folder): "--program", "gaussian", "--qm_input", - "wb97xd/lanl2dz scrf=(smd,solvent=acetonitrile)", + qm_input, ] subprocess.run(cmd_aqme) @@ -134,9 +144,7 @@ def test_QPREP_analysis(test_type, init_folder, target_folder, restore_folder): outfile = open(f'{destination}/{file.split(".")[0]}.com', "r") outlines = outfile.readlines() outfile.close() - - line_2 = "# wb97xd/lanl2dz scrf=(smd,solvent=acetonitrile)" - + if file.split(".")[0] == "H_freq": line_6 = "0 2" line_7 = "H 0.00000000 0.00000000 0.00000000" @@ -181,7 +189,7 @@ def test_QPREP_analysis(test_type, init_folder, target_folder, restore_folder): assert outlines[8].strip() == line_8 assert outlines[10].strip() == line_10 - + assert outlines[2].strip() == line_2 assert outlines[6].strip() == line_6 From 41f0c35a3487ef95ac8dbfca5693074b8b20cef7 Mon Sep 17 00:00:00 2001 From: Juanvi Alegre-Requena Date: Sat, 25 Feb 2023 14:30:38 +0100 Subject: [PATCH 6/6] 1. Change storing out file from csearch-crest --- aqme/csearch/crest.py | 3 ++- aqme/utils.py | 13 ++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/aqme/csearch/crest.py b/aqme/csearch/crest.py index 3b06e6b4..7358a304 100644 --- a/aqme/csearch/crest.py +++ b/aqme/csearch/crest.py @@ -251,6 +251,7 @@ def xtb_opt_main( xyzoutall = str(dat_dir) + "/" + name_no_path + "_conformers.xyz" + # CREST sampling if self.args.program.lower() == "crest": self.args.log.write(f"\no Starting CREST sampling") if constrained_opt: @@ -287,12 +288,12 @@ def xtb_opt_main( run_command(command, f"/{dat_dir}/{name_no_path}.out") - # get number of n_atoms try: natoms = open("crest_best.xyz").readlines()[0].strip() except FileNotFoundError: self.args.log.write(f"\nx CREST optimization failed! This might be caused by different reasons. For example, this might happen if you're using metal complexes without specifying any kind of template in the complex_type option (i.e. squareplanar).\n") + # CREGEN sorting if self.args.cregen and int(natoms) != 1: self.args.log.write(f"\no Starting CREGEN sorting") command = ["crest", "crest_best.xyz", "--cregen", "crest_conformers.xyz"] diff --git a/aqme/utils.py b/aqme/utils.py index 6454c201..0d36c155 100644 --- a/aqme/utils.py +++ b/aqme/utils.py @@ -33,17 +33,12 @@ def run_command(command, outfile): """ - Runs the subprocess command and outputs to the necessary output file + Runs the subprocess command and saves the results in an output file (not shown in the terminal) """ - p2 = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) - txt = [line.decode("utf-8") for line in p2.stdout] - p2.stdout.close() - - with open(outfile, "w") as f: - for line in txt: - f.write(line) - f.close() + output = open(outfile, "w") + subprocess.run(command, stdout=output, stderr=subprocess.DEVNULL) + output.close() def periodic_table():