Skip to content

Commit

Permalink
Merge pull request #139 from jvalegre/jv_branch
Browse files Browse the repository at this point in the history
v1.4.7
  • Loading branch information
jvalegre authored Mar 13, 2023
2 parents 6aafb51 + 5d6c1e9 commit 803302d
Show file tree
Hide file tree
Showing 14 changed files with 1,459 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@
"# based on the atom numbers above, I choose the constraints for my TS:\n",
"# 1) Bond between atoms 4 and 5 with a distance of 1.8 A\n",
"# 2) Bond between atoms 5 and 9 with a distance of 1.8 A\n",
"constraits_dist = [[3,4,1.8],[4,5,1.8]]\n",
"constraints_dist = [[3,4,1.8],[4,5,1.8]]\n",
"\n",
"# 3) Angle between atoms 4, 5 and 9 of 180 degrees\n",
"constraits_angle = [[3,4,5,180]]"
"constraints_angle = [[3,4,5,180]]"
]
},
{
Expand All @@ -73,13 +73,13 @@
"# 1) Mapped SMILES string (smi=smi_new)\n",
"# 2) CREST sampling (program='crest')\n",
"# 3) Include CREGEN post-analysis (cregen=True)\n",
"# 4) Define distance constraints (constraints_dist=constraits_dist)\n",
"# 5) Define angle constraints (constraints_angle=constraits_angle)\n",
"# 4) Define distance constraints (constraints_dist=constraints_dist)\n",
"# 5) Define angle constraints (constraints_angle=constraints_angle)\n",
"# 6) Add water solvation in CREST (crest_keywords=\"--alpb h2o\")\n",
"# 7) Add water solvation in the xTB pre-optimization (xtb_keywords=\"--alpb h2o\")\n",
"# 7) Number of processors used in CREST (nprocs=12)\n",
"csearch(smi=smi_new,name='TS-example',program='crest',cregen=True,\n",
" constraints_dist=constraits_dist,constraints_angle=constraits_angle,\n",
" constraints_dist=constraints_dist,constraints_angle=constraints_angle,\n",
" crest_keywords=\"--alpb h2o\",xtb_keywords=\"--alpb h2o\",nprocs=12)"
]
},
Expand Down
1,259 changes: 1,259 additions & 0 deletions Example_workflows/QCORR_processing_QM_outputs/QCORR_1/Imag_freq_no_corr.log

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions aqme/aqme.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def main():
ifreq_cutoff=args.ifreq_cutoff,
amplitude_ifreq=args.amplitude_ifreq,
freq_conv=args.freq_conv,
im_freq_input=args.im_freq_input,
s2_threshold=args.s2_threshold,
dup_threshold=args.dup_threshold,
ro_threshold=args.ro_threshold,
Expand Down
1 change: 1 addition & 0 deletions aqme/argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
"amplitude_ifreq": 0.2,
"ifreq_cutoff": 0.0,
"freq_conv": None,
"im_freq_input": 'opt=(calcfc,maxstep=5)',
"s2_threshold": 10.0,
"isom_type": None,
"isom_inputs": os.getcwd(),
Expand Down
27 changes: 14 additions & 13 deletions aqme/cmin.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,15 @@ def __init__(self, **kwargs):
"\no Number of finished jobs from CMIN", max=len(self.args.files)
)

file_format = os.path.splitext(self.args.files[0])[1]
if file_format.lower() in ['.xyz', '.gjf', '.com']:
file_format = os.path.basename(Path(self.args.files[0])).split('.')[1]
file_dir = os.path.dirname(Path(self.args.files[0]))
if file_format.lower() in ['xyz', 'gjf', 'com']:
for file in self.args.files:
prepare_com_files(self.args, file)
if file_format.lower() in ['.gjf', '.com']:
files_temp_extra = glob.glob('*.xyz')
files_cmin = glob.glob('*.sdf')
elif file_format.lower() == '.pdb':
if file_format.lower() in ['gjf', 'com']:
files_temp_extra = glob.glob(f'{file_dir}/*.xyz')
files_cmin = glob.glob(f'{file_dir}/*.sdf')
elif file_format.lower() == 'pdb':
for file in self.args.files:
command_pdb = [
"obabel",
Expand All @@ -178,8 +179,8 @@ def __init__(self, **kwargs):
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
files_cmin = glob.glob('*.sdf')
elif file_format.lower() == '.sdf':
files_cmin = glob.glob(f'{file_dir}/*.sdf')
elif file_format.lower() == 'sdf':
files_cmin = self.args.files
else:
self.args.log.write(f"\nx The input format {file_format} is not supported for CMIN refinement! Formats allowed: SDF, XYZ, COM, GJF and PDB")
Expand All @@ -198,7 +199,7 @@ def __init__(self, **kwargs):
f"CMIN"
)
else:
if Path(f"{self.args.destination}").exists():
if self.args.initial_dir.as_posix() in f"{self.args.destination}":
self.cmin_folder = Path(self.args.destination)
else:
self.cmin_folder = Path(self.args.initial_dir).joinpath(
Expand Down Expand Up @@ -233,8 +234,8 @@ def __init__(self, **kwargs):
self.args.log.finalize()

# delete extra temporary files created when using XYZ, GJF, COM and PDB files
if file_format.lower() in ['.xyz', '.gjf', '.com', '.pdb']:
if file_format.lower() in ['.gjf', '.com']:
if file_format.lower() in ['xyz', 'gjf', 'com', 'pdb']:
if file_format.lower() in ['gjf', 'com']:
files_cmin = files_cmin + files_temp_extra
for temp_file in files_cmin:
os.remove(temp_file)
Expand Down Expand Up @@ -278,9 +279,9 @@ def compute_cmin(self, file):
# checks if xTB is installed
_ = self.get_cmin_model()
# sets charge and mult
file_format = os.path.splitext(file)[1]
file_format = os.path.basename(Path(file)).split('.')[1]
charge_input, mult_input, final_mult = None, None, None
if file_format.lower() == '.sdf':
if file_format.lower() == 'sdf':
if self.args.charge is None or self.args.mult is None:
# read charge and mult from SDF if possible (i.e. charge/mult of SDFs created with CSEARCH)
with open(file, "r") as F:
Expand Down
6 changes: 2 additions & 4 deletions aqme/csearch/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,12 +450,10 @@ def compute_confs(
f"CSEARCH"
)
else:
if Path(f"{self.args.destination}").exists():
if self.args.initial_dir.as_posix() in f"{self.args.destination}":
self.csearch_folder = Path(self.args.destination)
else:
self.csearch_folder = Path(self.args.initial_dir).joinpath(
self.args.destination
)
self.csearch_folder = Path(self.args.initial_dir).joinpath(self.args.destination)

self.csearch_folder.mkdir(exist_ok=True, parents=True)

Expand Down
17 changes: 12 additions & 5 deletions aqme/csearch/crest.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,25 +85,29 @@ def xtb_opt_main(
"""

name_no_path = name.replace("/", "\\").split("\\")[-1].split(".")[0]
# where RDKit generates the files
# folder to create the files
if self.args.destination is None:
if method_opt == 'crest':
csearch_dir = Path(self.args.w_dir_main) / "CSEARCH"
elif method_opt == 'xtb':
rdmolfiles.MolToXYZFile(mol, name + ".xyz")
csearch_dir = Path(self.args.w_dir_main) / "CMIN"
else:
csearch_dir = Path(self.args.destination)
if self.args.initial_dir.as_posix() in f"{self.args.destination}":
csearch_dir = Path(self.args.destination)
else:
csearch_dir = Path(self.args.initial_dir).joinpath(self.args.destination)

# create the initial xyz input
if method_opt == 'crest':
self.args.log.write(f"\no Starting xTB pre-optimization before CREST sampling")
dat_dir = csearch_dir / "crest_xyz"
xyzin = f"{dat_dir}/{name_no_path}.xyz"
elif method_opt == 'xtb':
rdmolfiles.MolToXYZFile(mol, f"{name}.xyz")
self.args.log.write(f"\no Starting xTB optimization")
dat_dir = csearch_dir / "xtb_xyz"
xyzin = f"{dat_dir}/{name_no_path}_xtb.xyz"
dat_dir.mkdir(exist_ok=True, parents=True)

shutil.move(f"{name}.xyz", xyzin)

os.environ["OMP_STACKSIZE"] = self.args.stacksize
Expand Down Expand Up @@ -360,7 +364,10 @@ def xtb_opt_main(
if os.path.exists(file):
if file.find('.out') == -1:
if self.args.program.lower() == "xtb":
os.remove(file)
try:
os.remove(file)
except OSError: # this avoids problems when running AQME in HPCs
pass
elif self.args.program.lower() == "crest":
if file.find('_xtb2') == -1 and file.find('_xtb1') == -1 and file.find('.out') == -1:
try:
Expand Down
88 changes: 59 additions & 29 deletions aqme/qcorr.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
optimization but did not convergence in the subsequent frequency
calculation. Options: opt keyword as string (i.e. 'opt=(calcfc,maxstep=5)').
If readfc is specified in the string, the chk option must be included as well.
im_freq_input : str, default='opt=(calcfc,maxstep=5)' (Gaussian), '\n%geom\nCalc_Hess true\nMaxStep 0.05\nend' (ORCA)
When extra imaginery frequencies are detected by QCORR, it automatically adds
hessian calcs before starting geometry optimizations. This option can be
disabled using im_freq_input=None.
s2_threshold : float, default=10.0
Cut off for spin contamination during analysis in % of the expected value
(i.e. multiplicity 3 has an the expected <S**2> of 2.0,
Expand Down Expand Up @@ -203,8 +207,12 @@ def qcorr_processing(self):
errortype = self.analyze_isom(file, cartesians, atom_types, errortype)

# move initial QM input files (if the files are placed in the same folder as the output files)
if cclib_data["metadata"]["QM program"].lower().find("gaussian") > -1:
input_suffix = "com"
elif cclib_data["metadata"]["QM program"].lower().find("orca") > -1:
input_suffix = "inp"
if (
os.path.exists(f"{self.args.w_dir_main}/{file_name}.com")
os.path.exists(f"{self.args.w_dir_main}/{file_name}.{input_suffix}")
and self.args.round_num == 1
):
move_file(
Expand Down Expand Up @@ -244,7 +252,8 @@ def qcorr_processing(self):
csv_qcorr = self.write_qcorr_csv(file_terms)

# performs a full analysis to ensure that the calcs were run with the same parameters
if self.args.fullcheck == "False":
# currently, this function is not working with ORCA calcs
if self.args.fullcheck == "False" or cclib_data["metadata"]["QM program"].lower().find("orca") > -1:
self.args.fullcheck = False
elif self.args.fullcheck == "True":
self.args.fullcheck = True
Expand Down Expand Up @@ -486,19 +495,31 @@ def analyze_normal(self, duplicate_data, errortype, cclib_data, file_name):
if not opt_found:
cclib_data["metadata"]["keywords line"] += " opt"

if errortype == "freq_no_conv":
# adjust the keywords so only FREQ is calculated
new_keywords_line = ""
for keyword in cclib_data["metadata"]["keywords line"].split():
if keyword.lower().startswith("opt"):
keyword = self.args.freq_conv
if (
cclib_data["metadata"]["ground or transition state"]
== "transition_state"
):
keyword = keyword.replace("=(", "=(ts,noeigen,")
new_keywords_line += keyword
new_keywords_line += " "
# adding the Hessian calculation before OPT increases the rate of success
if errortype in ["freq_no_conv","extra_imag_freq"]:
new_opt = None
if errortype == "freq_no_conv" and self.args.freq_conv not in [None,'None']:
new_opt = self.args.freq_conv
elif errortype == "extra_imag_freq" and self.args.im_freq_input not in [None,'None']:
new_opt = self.args.im_freq_input
if cclib_data["metadata"]["QM program"].lower().find("gaussian") > -1:
new_keywords_line = ""
for keyword in cclib_data["metadata"]["keywords line"].split():
if keyword.lower().startswith("opt"):
if new_opt is not None:
keyword = new_opt
if cclib_data["metadata"]["ground or transition state"] == "transition_state":
keyword = keyword.replace("=(", "=(ts,noeigen,")
new_keywords_line += keyword
new_keywords_line += " "
elif cclib_data["metadata"]["QM program"].lower().find("orca") > -1:
new_keywords_line = cclib_data["metadata"]["keywords line"]
if self.args.im_freq_input not in [None,'None']:
# change the default value
if self.args.im_freq_input == 'opt=(calcfc,maxstep=5)':
self.args.im_freq_input = '\n%geom\nCalc_Hess true\nMaxStep 0.05\nend'
new_keywords_line += self.args.im_freq_input

cclib_data["metadata"]["keywords line"] = new_keywords_line

elif errortype == "linear_mol_wrong":
Expand All @@ -510,6 +531,12 @@ def analyze_abnormal(self, errortype, cclib_data, outlines):
"""
Analyze errors from calculations that did not finish normally
"""

if cclib_data["metadata"]["QM program"].lower().find("gaussian") > -1:
program = 'gaussian'
elif cclib_data["metadata"]["QM program"].lower().find("orca") > -1:
program = "orca"

# for calcs with finished OPT but no freqs, adjust the keywords so only FREQ is calculated
if errortype == "no_freq":
new_keywords_line = ""
Expand All @@ -524,20 +551,23 @@ def analyze_abnormal(self, errortype, cclib_data, outlines):
else:
# help to fix SCF convergence errors
if errortype == "SCFerror":
if (
cclib_data["metadata"]["keywords line"].find(" scf=xqc") > -1
or cclib_data["metadata"]["keywords line"].find(" scf=qc") > -1
):
new_keywords_line = ""
for keyword in cclib_data["metadata"]["keywords line"].split():
if keyword == "scf=xqc":
keyword = "scf=qc"
new_keywords_line += keyword
new_keywords_line += " "
cclib_data["metadata"]["keywords line"] = new_keywords_line

else:
cclib_data["metadata"]["keywords line"] += " scf=xqc"
if program == 'gaussian':
if (
cclib_data["metadata"]["keywords line"].find(" scf=xqc") > -1
or cclib_data["metadata"]["keywords line"].find(" scf=qc") > -1
):
new_keywords_line = ""
for keyword in cclib_data["metadata"]["keywords line"].split():
if keyword == "scf=xqc":
keyword = "scf=qc"
new_keywords_line += keyword
new_keywords_line += " "
cclib_data["metadata"]["keywords line"] = new_keywords_line
else:
cclib_data["metadata"]["keywords line"] += " scf=xqc"
elif program == 'orca':
if 'SlowConv' not in cclib_data["metadata"]["keywords line"]:
cclib_data["metadata"]["keywords line"] = 'SlowConv ' + cclib_data["metadata"]["keywords line"]

if errortype in ["not_specified", "SCFerror"]:
if "geometric values" in cclib_data["optimization"]:
Expand Down
50 changes: 44 additions & 6 deletions aqme/qcorr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,6 @@ def get_json_data(self, file, cclib_data):
"""

outlines = read_file(os.getcwd(), self.args.w_dir_main, file)

# initial loop just to detect the QM program
for i, line in enumerate(outlines):
# get program
Expand All @@ -386,9 +385,9 @@ def get_json_data(self, file, cclib_data):

elif "* O R C A *" in line:
for j in range(i, i + 100):
if "Program Version" in line.strip():
if "Program Version" in outlines[j].strip():
cclib_data["metadata"] = {}
version_program = "ORCA version " + line.split()[2]
version_program = "ORCA version " + outlines[j].split()[2]
cclib_data["metadata"]["QM program"] = version_program
break

Expand Down Expand Up @@ -581,9 +580,8 @@ def get_json_data(self, file, cclib_data):
cclib_data["properties"]["NMR"]["NMR eigenvalues"] = nmr_eigen
cclib_data["properties"]["NMR"]["NMR isotopic tensors"] = nmr_iso


elif cclib_data["metadata"]["QM program"].lower().find("orca") > -1:
for i in reversed(range(0, outlines)):
for i in reversed(range(0, len(outlines))):
if outlines[i][:25] == "FINAL SINGLE POINT ENERGY":
# in eV to match the format from cclib
orca_e = float(outlines[i].split()[-1])
Expand All @@ -592,8 +590,48 @@ def get_json_data(self, file, cclib_data):
] = cclib.parser.utils.convertor(orca_e, "hartree", "eV")
break

for i, line in enumerate(outlines):
# Extract number of processors
if "%pal" in line:
pal_line = ''
for j in range(i,i+3):
if outlines[j][0] not in ['%','!'] or "%pal" in outlines[j]:
pal_line += outlines[j].rstrip("\n")[5:]
if 'nprocs' in pal_line:
nprocs = pal_line.strip().split()[2]
cclib_data["metadata"]["processors"] = nprocs

# Extract memory
elif '%maxcore' in line:
mem = int(line.strip().split()[3])
cclib_data["metadata"]["memory"] = f'{mem}MB'

# Extract input line
elif "!" in line:
keywords_line = ""
for j in range(i, i + 100):
if "*" in outlines[j]:
break
else:
keywords_line += outlines[j][6:]
cclib_data["metadata"]["keywords line"] = keywords_line[1:].rstrip("\n")
calc_type = "ground_state"
for keyword in keywords_line.split():
if keyword.lower() in ["OptTS",'NEB-TS']:
calc_type = "transition_state"
break
if keyword.lower()[0:3] == 'pal':
cclib_data["metadata"]["processors"] = keyword[3]
cclib_data["metadata"]["ground or transition state"] = calc_type

elif 'END OF INPUT' in line:
break

if cclib_data != {}:
with open(f'{file.split(".")[0]}.json', "w") as outfile:
# this prevents errors when the names contain "."
name_path = os.path.basename(Path(file))
dir_path = os.path.dirname(Path(file))
with open(f'{dir_path}/{name_path.split(".")[0]}.json', "w") as outfile:
json.dump(cclib_data, outfile, indent=1)

return cclib_data
Expand Down
Loading

0 comments on commit 803302d

Please sign in to comment.