Merge pull request #139 from jvalegre/jv_branch

v1.4.7
jvalegre · Mar 13, 2023 · 803302d · 803302d
2 parents 6aafb51 + 5d6c1e9
commit 803302d
Show file tree

Hide file tree

Showing 14 changed files with 1,459 additions and 93 deletions.
diff --git a/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_CREST_TS.ipynb b/Example_workflows/CSEARCH_CMIN_conformer_generation/CSEARCH_CREST_TS.ipynb
@@ -48,10 +48,10 @@
     "# based on the atom numbers above, I choose the constraints for my TS:\n",
     "# 1) Bond between atoms 4 and 5 with a distance of 1.8 A\n",
     "# 2) Bond between atoms 5 and 9 with a distance of 1.8 A\n",
-    "constraits_dist = [[3,4,1.8],[4,5,1.8]]\n",
+    "constraints_dist = [[3,4,1.8],[4,5,1.8]]\n",
     "\n",
     "# 3) Angle between atoms 4, 5 and 9 of 180 degrees\n",
-    "constraits_angle = [[3,4,5,180]]"
+    "constraints_angle = [[3,4,5,180]]"
    ]
   },
   {
@@ -73,13 +73,13 @@
     "# 1) Mapped SMILES string (smi=smi_new)\n",
     "# 2) CREST sampling (program='crest')\n",
     "# 3) Include CREGEN post-analysis (cregen=True)\n",
-    "# 4) Define distance constraints (constraints_dist=constraits_dist)\n",
-    "# 5) Define angle constraints (constraints_angle=constraits_angle)\n",
+    "# 4) Define distance constraints (constraints_dist=constraints_dist)\n",
+    "# 5) Define angle constraints (constraints_angle=constraints_angle)\n",
     "# 6) Add water solvation in CREST (crest_keywords=\"--alpb h2o\")\n",
     "# 7) Add water solvation in the xTB pre-optimization (xtb_keywords=\"--alpb h2o\")\n",
     "# 7) Number of processors used in CREST (nprocs=12)\n",
     "csearch(smi=smi_new,name='TS-example',program='crest',cregen=True,\n",
-    "        constraints_dist=constraits_dist,constraints_angle=constraits_angle,\n",
+    "        constraints_dist=constraints_dist,constraints_angle=constraints_angle,\n",
     "        crest_keywords=\"--alpb h2o\",xtb_keywords=\"--alpb h2o\",nprocs=12)"
    ]
   },

diff --git a/Example_workflows/QCORR_processing_QM_outputs/QCORR_1/Imag_freq_no_corr.log b/Example_workflows/QCORR_processing_QM_outputs/QCORR_1/Imag_freq_no_corr.log
diff --git a/aqme/aqme.py b/aqme/aqme.py
@@ -183,6 +183,7 @@ def main():
             ifreq_cutoff=args.ifreq_cutoff,
             amplitude_ifreq=args.amplitude_ifreq,
             freq_conv=args.freq_conv,
+            im_freq_input=args.im_freq_input,
             s2_threshold=args.s2_threshold,
             dup_threshold=args.dup_threshold,
             ro_threshold=args.ro_threshold,

diff --git a/aqme/argument_parser.py b/aqme/argument_parser.py
@@ -82,6 +82,7 @@
     "amplitude_ifreq": 0.2,
     "ifreq_cutoff": 0.0,
     "freq_conv": None,
+    "im_freq_input": 'opt=(calcfc,maxstep=5)',
     "s2_threshold": 10.0,
     "isom_type": None,
     "isom_inputs": os.getcwd(),

diff --git a/aqme/cmin.py b/aqme/cmin.py
@@ -157,14 +157,15 @@ def __init__(self, **kwargs):
             "\no  Number of finished jobs from CMIN", max=len(self.args.files)
         )
 
-        file_format = os.path.splitext(self.args.files[0])[1]
-        if file_format.lower() in ['.xyz', '.gjf', '.com']:
+        file_format = os.path.basename(Path(self.args.files[0])).split('.')[1]
+        file_dir = os.path.dirname(Path(self.args.files[0]))
+        if file_format.lower() in ['xyz', 'gjf', 'com']:
             for file in self.args.files:
                 prepare_com_files(self.args, file)
-            if file_format.lower() in ['.gjf', '.com']:
-                files_temp_extra = glob.glob('*.xyz')
-            files_cmin = glob.glob('*.sdf')
-        elif file_format.lower() == '.pdb':
+            if file_format.lower() in ['gjf', 'com']:
+                files_temp_extra = glob.glob(f'{file_dir}/*.xyz')
+            files_cmin = glob.glob(f'{file_dir}/*.sdf')
+        elif file_format.lower() == 'pdb':
             for file in self.args.files:
                 command_pdb = [
                     "obabel",
@@ -178,8 +179,8 @@ def __init__(self, **kwargs):
                     stdout=subprocess.DEVNULL,
                     stderr=subprocess.DEVNULL,
                 )
-            files_cmin = glob.glob('*.sdf')
-        elif file_format.lower() == '.sdf':
+            files_cmin = glob.glob(f'{file_dir}/*.sdf')
+        elif file_format.lower() == 'sdf':
             files_cmin = self.args.files
         else:
             self.args.log.write(f"\nx  The input format {file_format} is not supported for CMIN refinement! Formats allowed: SDF, XYZ, COM, GJF and PDB")
@@ -198,7 +199,7 @@ def __init__(self, **kwargs):
                     f"CMIN"
                 )
             else:
-                if Path(f"{self.args.destination}").exists():
+                if self.args.initial_dir.as_posix() in f"{self.args.destination}":
                     self.cmin_folder = Path(self.args.destination)
                 else:
                     self.cmin_folder = Path(self.args.initial_dir).joinpath(
@@ -233,8 +234,8 @@ def __init__(self, **kwargs):
         self.args.log.finalize()
 
         # delete extra temporary files created when using XYZ, GJF, COM and PDB files
-        if file_format.lower() in ['.xyz', '.gjf', '.com', '.pdb']:
-            if file_format.lower() in ['.gjf', '.com']:
+        if file_format.lower() in ['xyz', 'gjf', 'com', 'pdb']:
+            if file_format.lower() in ['gjf', 'com']:
                 files_cmin = files_cmin + files_temp_extra
             for temp_file in files_cmin:
                 os.remove(temp_file)
@@ -278,9 +279,9 @@ def compute_cmin(self, file):
             # checks if xTB is installed
             _ = self.get_cmin_model()
             # sets charge and mult
-            file_format = os.path.splitext(file)[1]
+            file_format = os.path.basename(Path(file)).split('.')[1]
             charge_input, mult_input, final_mult = None, None, None
-            if file_format.lower() == '.sdf':
+            if file_format.lower() == 'sdf':
                 if self.args.charge is None or self.args.mult is None:
                     # read charge and mult from SDF if possible (i.e. charge/mult of SDFs created with CSEARCH)
                     with open(file, "r") as F:

diff --git a/aqme/csearch/base.py b/aqme/csearch/base.py
@@ -450,12 +450,10 @@ def compute_confs(
                 f"CSEARCH"
             )
         else:
-            if Path(f"{self.args.destination}").exists():
+            if self.args.initial_dir.as_posix() in f"{self.args.destination}":
                 self.csearch_folder = Path(self.args.destination)
             else:
-                self.csearch_folder = Path(self.args.initial_dir).joinpath(
-                    self.args.destination
-                )
+                self.csearch_folder = Path(self.args.initial_dir).joinpath(self.args.destination)
 
         self.csearch_folder.mkdir(exist_ok=True, parents=True)
 

diff --git a/aqme/csearch/crest.py b/aqme/csearch/crest.py
@@ -85,25 +85,29 @@ def xtb_opt_main(
     """
 
     name_no_path = name.replace("/", "\\").split("\\")[-1].split(".")[0]
-    # where RDKit generates the files
+    # folder to create the files
     if self.args.destination is None:
         if method_opt == 'crest':
             csearch_dir = Path(self.args.w_dir_main) / "CSEARCH"
         elif method_opt == 'xtb':
-            rdmolfiles.MolToXYZFile(mol, name + ".xyz")
             csearch_dir = Path(self.args.w_dir_main) / "CMIN"
     else:
-        csearch_dir = Path(self.args.destination)
+        if self.args.initial_dir.as_posix() in f"{self.args.destination}":
+            csearch_dir = Path(self.args.destination)
+        else:
+            csearch_dir = Path(self.args.initial_dir).joinpath(self.args.destination)
+
+    # create the initial xyz input
     if method_opt == 'crest':
         self.args.log.write(f"\no  Starting xTB pre-optimization before CREST sampling")
         dat_dir = csearch_dir / "crest_xyz"
         xyzin = f"{dat_dir}/{name_no_path}.xyz"
     elif method_opt == 'xtb':
+        rdmolfiles.MolToXYZFile(mol, f"{name}.xyz")
         self.args.log.write(f"\no  Starting xTB optimization")
         dat_dir = csearch_dir / "xtb_xyz"
         xyzin = f"{dat_dir}/{name_no_path}_xtb.xyz"
     dat_dir.mkdir(exist_ok=True, parents=True)
-
     shutil.move(f"{name}.xyz", xyzin)
 
     os.environ["OMP_STACKSIZE"] = self.args.stacksize
@@ -360,7 +364,10 @@ def xtb_opt_main(
         if os.path.exists(file):
             if file.find('.out') == -1:
                 if self.args.program.lower() == "xtb":
-                    os.remove(file)
+                    try:
+                        os.remove(file)
+                    except OSError: # this avoids problems when running AQME in HPCs
+                        pass
                 elif self.args.program.lower() == "crest":
                     if file.find('_xtb2') == -1 and file.find('_xtb1') == -1 and file.find('.out') == -1:
                         try:

diff --git a/aqme/qcorr.py b/aqme/qcorr.py
@@ -23,6 +23,10 @@
       optimization but did not convergence in the subsequent frequency 
       calculation. Options: opt keyword as string (i.e. 'opt=(calcfc,maxstep=5)'). 
       If readfc is specified in the string, the chk option must be included as well.
+   im_freq_input : str, default='opt=(calcfc,maxstep=5)' (Gaussian), '\n%geom\nCalc_Hess true\nMaxStep 0.05\nend' (ORCA)
+      When extra imaginery frequencies are detected by QCORR, it automatically adds
+      hessian calcs before starting geometry optimizations. This option can be 
+      disabled using im_freq_input=None.
    s2_threshold : float, default=10.0
       Cut off for spin contamination during analysis in % of the expected value 
       (i.e. multiplicity 3 has an the expected <S**2> of 2.0, 
@@ -203,8 +207,12 @@ def qcorr_processing(self):
                 errortype = self.analyze_isom(file, cartesians, atom_types, errortype)
 
             # move initial QM input files (if the files are placed in the same folder as the output files)
+            if cclib_data["metadata"]["QM program"].lower().find("gaussian") > -1:
+                input_suffix = "com"
+            elif cclib_data["metadata"]["QM program"].lower().find("orca") > -1:
+                input_suffix = "inp"
             if (
-                os.path.exists(f"{self.args.w_dir_main}/{file_name}.com")
+                os.path.exists(f"{self.args.w_dir_main}/{file_name}.{input_suffix}")
                 and self.args.round_num == 1
             ):
                 move_file(
@@ -244,7 +252,8 @@ def qcorr_processing(self):
             csv_qcorr = self.write_qcorr_csv(file_terms)
 
         # performs a full analysis to ensure that the calcs were run with the same parameters
-        if self.args.fullcheck == "False":
+        # currently, this function is not working with ORCA calcs
+        if self.args.fullcheck == "False" or cclib_data["metadata"]["QM program"].lower().find("orca") > -1:
             self.args.fullcheck = False
         elif self.args.fullcheck == "True":
             self.args.fullcheck = True
@@ -486,19 +495,31 @@ def analyze_normal(self, duplicate_data, errortype, cclib_data, file_name):
             if not opt_found:
                 cclib_data["metadata"]["keywords line"] += " opt"
 
-            if errortype == "freq_no_conv":
-                # adjust the keywords so only FREQ is calculated
-                new_keywords_line = ""
-                for keyword in cclib_data["metadata"]["keywords line"].split():
-                    if keyword.lower().startswith("opt"):
-                        keyword = self.args.freq_conv
-                        if (
-                            cclib_data["metadata"]["ground or transition state"]
-                            == "transition_state"
-                        ):
-                            keyword = keyword.replace("=(", "=(ts,noeigen,")
-                    new_keywords_line += keyword
-                    new_keywords_line += " "
+            # adding the Hessian calculation before OPT increases the rate of success
+            if errortype in ["freq_no_conv","extra_imag_freq"]:
+                new_opt = None
+                if errortype == "freq_no_conv" and self.args.freq_conv not in [None,'None']:
+                    new_opt = self.args.freq_conv
+                elif errortype == "extra_imag_freq" and self.args.im_freq_input not in [None,'None']:
+                    new_opt = self.args.im_freq_input
+                if cclib_data["metadata"]["QM program"].lower().find("gaussian") > -1:
+                    new_keywords_line = ""
+                    for keyword in cclib_data["metadata"]["keywords line"].split():
+                        if keyword.lower().startswith("opt"):
+                            if new_opt is not None:
+                                keyword = new_opt
+                                if cclib_data["metadata"]["ground or transition state"] == "transition_state":
+                                    keyword = keyword.replace("=(", "=(ts,noeigen,")
+                        new_keywords_line += keyword
+                        new_keywords_line += " "
+                elif cclib_data["metadata"]["QM program"].lower().find("orca") > -1:
+                    new_keywords_line = cclib_data["metadata"]["keywords line"]
+                    if self.args.im_freq_input not in [None,'None']:
+                        # change the default value
+                        if self.args.im_freq_input == 'opt=(calcfc,maxstep=5)':
+                            self.args.im_freq_input = '\n%geom\nCalc_Hess true\nMaxStep 0.05\nend'
+                        new_keywords_line += self.args.im_freq_input
+
                 cclib_data["metadata"]["keywords line"] = new_keywords_line
 
             elif errortype == "linear_mol_wrong":
@@ -510,6 +531,12 @@ def analyze_abnormal(self, errortype, cclib_data, outlines):
         """
         Analyze errors from calculations that did not finish normally
         """
+
+        if cclib_data["metadata"]["QM program"].lower().find("gaussian") > -1:
+            program = 'gaussian'
+        elif cclib_data["metadata"]["QM program"].lower().find("orca") > -1:
+            program = "orca"
+
         # for calcs with finished OPT but no freqs, adjust the keywords so only FREQ is calculated
         if errortype == "no_freq":
             new_keywords_line = ""
@@ -524,20 +551,23 @@ def analyze_abnormal(self, errortype, cclib_data, outlines):
         else:
             # help to fix SCF convergence errors
             if errortype == "SCFerror":
-                if (
-                    cclib_data["metadata"]["keywords line"].find(" scf=xqc") > -1
-                    or cclib_data["metadata"]["keywords line"].find(" scf=qc") > -1
-                ):
-                    new_keywords_line = ""
-                    for keyword in cclib_data["metadata"]["keywords line"].split():
-                        if keyword == "scf=xqc":
-                            keyword = "scf=qc"
-                        new_keywords_line += keyword
-                        new_keywords_line += " "
-                    cclib_data["metadata"]["keywords line"] = new_keywords_line
-
-                else:
-                    cclib_data["metadata"]["keywords line"] += " scf=xqc"
+                if program == 'gaussian':
+                    if (
+                        cclib_data["metadata"]["keywords line"].find(" scf=xqc") > -1
+                        or cclib_data["metadata"]["keywords line"].find(" scf=qc") > -1
+                    ):
+                        new_keywords_line = ""
+                        for keyword in cclib_data["metadata"]["keywords line"].split():
+                            if keyword == "scf=xqc":
+                                keyword = "scf=qc"
+                            new_keywords_line += keyword
+                            new_keywords_line += " "
+                        cclib_data["metadata"]["keywords line"] = new_keywords_line
+                    else:
+                        cclib_data["metadata"]["keywords line"] += " scf=xqc"
+                elif program == 'orca':
+                    if 'SlowConv' not in cclib_data["metadata"]["keywords line"]:
+                        cclib_data["metadata"]["keywords line"] = 'SlowConv ' + cclib_data["metadata"]["keywords line"]
 
             if errortype in ["not_specified", "SCFerror"]:
                 if "geometric values" in cclib_data["optimization"]:

diff --git a/aqme/qcorr_utils.py b/aqme/qcorr_utils.py
@@ -368,7 +368,6 @@ def get_json_data(self, file, cclib_data):
     """
 
     outlines = read_file(os.getcwd(), self.args.w_dir_main, file)
-
     # initial loop just to detect the QM program
     for i, line in enumerate(outlines):
         # get program
@@ -386,9 +385,9 @@ def get_json_data(self, file, cclib_data):
 
         elif "* O   R   C   A *" in line:
             for j in range(i, i + 100):
-                if "Program Version" in line.strip():
+                if "Program Version" in outlines[j].strip():
                     cclib_data["metadata"] = {}
-                    version_program = "ORCA version " + line.split()[2]
+                    version_program = "ORCA version " + outlines[j].split()[2]
                     cclib_data["metadata"]["QM program"] = version_program
                     break
 
@@ -581,9 +580,8 @@ def get_json_data(self, file, cclib_data):
                 cclib_data["properties"]["NMR"]["NMR eigenvalues"] = nmr_eigen
                 cclib_data["properties"]["NMR"]["NMR isotopic tensors"] = nmr_iso
 
-
     elif cclib_data["metadata"]["QM program"].lower().find("orca") > -1:
-        for i in reversed(range(0, outlines)):
+        for i in reversed(range(0, len(outlines))):
             if outlines[i][:25] == "FINAL SINGLE POINT ENERGY":
                 # in eV to match the format from cclib
                 orca_e = float(outlines[i].split()[-1])
@@ -592,8 +590,48 @@ def get_json_data(self, file, cclib_data):
                 ] = cclib.parser.utils.convertor(orca_e, "hartree", "eV")
                 break
 
+        for i, line in enumerate(outlines):
+            # Extract number of processors
+            if "%pal" in line:
+                pal_line = ''
+                for j in range(i,i+3):
+                    if outlines[j][0] not in ['%','!'] or "%pal" in outlines[j]:
+                        pal_line += outlines[j].rstrip("\n")[5:]
+                if 'nprocs' in pal_line:
+                    nprocs = pal_line.strip().split()[2]
+                    cclib_data["metadata"]["processors"] = nprocs
+
+            # Extract memory
+            elif '%maxcore' in line:
+                mem = int(line.strip().split()[3])
+                cclib_data["metadata"]["memory"] = f'{mem}MB'
+
+            # Extract input line
+            elif "!" in line:
+                keywords_line = ""
+                for j in range(i, i + 100):
+                    if "*" in outlines[j]:
+                        break
+                    else:
+                        keywords_line += outlines[j][6:]
+                cclib_data["metadata"]["keywords line"] = keywords_line[1:].rstrip("\n")
+                calc_type = "ground_state"
+                for keyword in keywords_line.split():
+                    if keyword.lower() in ["OptTS",'NEB-TS']:
+                        calc_type = "transition_state"
+                        break
+                    if keyword.lower()[0:3] == 'pal':
+                        cclib_data["metadata"]["processors"] = keyword[3]
+                cclib_data["metadata"]["ground or transition state"] = calc_type
+
+            elif 'END OF INPUT' in line:
+                break
+
     if cclib_data != {}:
-        with open(f'{file.split(".")[0]}.json', "w") as outfile:
+        # this prevents errors when the names contain "."
+        name_path = os.path.basename(Path(file))
+        dir_path = os.path.dirname(Path(file))
+        with open(f'{dir_path}/{name_path.split(".")[0]}.json', "w") as outfile:
             json.dump(cclib_data, outfile, indent=1)
 
     return cclib_data