TheoChem-VU · YHordijk · Apr 25, 2024 · Feb 7, 2024 · Feb 7, 2024 · Feb 7, 2024
diff --git a/recipes/job/orca_counter_poise.py b/recipes/job/orca_counter_poise.py
@@ -0,0 +1,62 @@
+from tcutility.job import ORCAJob
+from tcutility import molecule
+
+
+
+test_mode = False
+sbatch = dict(p='tc', n=32, mem=224000, t='120:00:00')
+
+mol = molecule.load('RC_C2H4_CH3.xyz')
+frags = molecule.guess_fragments(mol)
+
+# for each fragment we do an optimization and single point on the complex geometry
+for fragname, fragmol in frags.items():
+    with ORCAJob(test_mode=test_mode) as job:
+        job.molecule(fragmol)
+        job.optimization()
+        job.name = f'frag_{fragname}_GO'
+        job.spin_polarization(fragmol.flags.get('spinpol', 0))
+        job.sbatch(**sbatch)
+        job.rundir = 'calculations'
+        job.add_preamble('export PATH=/scistor/tc/dra480/bin/ompi411/bin:$PATH')
+        job.add_preamble('export LD_LIBRARY_PATH=/scistor/tc/dra480/bin/ompi411/lib/:$LD_LIBRARY_PATH')
+        job.orca_path = "/scistor/tc/dra480/bin/orca500/orca"
+        job.settings.mdci.UseQROs = 'True'
+        [job.settings.main.add(key) for key in 'CCSD(T) CC-pVTZ TightSCF UNO'.split()]
+
+    with ORCAJob(test_mode=test_mode) as job:
+        job.molecule(fragmol)
+        job.name = f'frag_{fragname}_SP'
+        job.spin_polarization(fragmol.flags.get('spinpol', 0))
+        job.sbatch(**sbatch)
+        job.rundir = 'calculations'
+        job.add_preamble('export PATH=/scistor/tc/dra480/bin/ompi411/bin:$PATH')
+        job.add_preamble('export LD_LIBRARY_PATH=/scistor/tc/dra480/bin/ompi411/lib/:$LD_LIBRARY_PATH')
+        job.orca_path = "/scistor/tc/dra480/bin/orca500/orca"
+        job.settings.mdci.UseQROs = 'True'
+        [job.settings.main.add(key) for key in 'CCSD(T) CC-pVTZ TightSCF UNO'.split()]
+
+    with ORCAJob(test_mode=test_mode) as job:
+        job.molecule(mol)
+        job.name = f'complex_frag_{fragname}_BS'
+        job.spin_polarization(sum([fragmol.flags.get('spinpol', 0) for fragmol in frags.values()]) - fragmol.flags.get('spinpol', 0))
+        job.ghost_atoms([mol.atoms.index(atom) + 1 for atom in fragmol])
+        job.sbatch(**sbatch)
+        job.rundir = 'calculations'
+        job.add_preamble('export PATH=/scistor/tc/dra480/bin/ompi411/bin:$PATH')
+        job.add_preamble('export LD_LIBRARY_PATH=/scistor/tc/dra480/bin/ompi411/lib/:$LD_LIBRARY_PATH')
+        job.orca_path = "/scistor/tc/dra480/bin/orca500/orca"
+        job.settings.mdci.UseQROs = 'True'
+        [job.settings.main.add(key) for key in 'CCSD(T) CC-pVTZ TightSCF UNO'.split()]
+
+with ORCAJob(test_mode=test_mode) as job:
+    job.molecule(mol)
+    job.name = 'complex'
+    job.spin_polarization(sum([fragmol.flags.get('spinpol', 0) for fragmol in frags.values()]))
+    job.sbatch(**sbatch)
+    job.rundir = 'calculations'
+    job.add_preamble('export PATH=/scistor/tc/dra480/bin/ompi411/bin:$PATH')
+    job.add_preamble('export LD_LIBRARY_PATH=/scistor/tc/dra480/bin/ompi411/lib/:$LD_LIBRARY_PATH')
+    job.orca_path = "/scistor/tc/dra480/bin/orca500/orca"
+    job.settings.mdci.UseQROs = 'True'
+    [job.settings.main.add(key) for key in 'CCSD(T) CC-pVTZ TightSCF UNO'.split()]
diff --git a/src/tcutility/job/generic.py b/src/tcutility/job/generic.py
@@ -69,7 +69,11 @@ def sbatch(self, **kwargs):
         Change slurm settings, for example, to change the partition or change the number of cores to use.
         The arguments are the same as you would use for sbatch (`see sbatch manual <https://slurm.schedmd.com/sbatch.html>`_). E.g. to change the partition to 'tc' call:
 
-        ``job.sbatch(p='tc')`` or ``job.sbatch(partition='tc')``
+        ``job.sbatch(p='tc')`` or ``job.sbatch(partition='tc')``.
+
+        Flags can be set as arguments with a boolean to enable or disable them:
+
+        ``job.sbatch(exclusive=True)`` will set the ``--exclusive`` flag.
 
         .. warning::
 

diff --git a/src/tcutility/job/orca.py b/src/tcutility/job/orca.py
@@ -1,65 +1,107 @@
-from tcutility import log, results
+from tcutility import log, results, ensure_list, spell_check, slurm
 from tcutility.job.generic import Job
 import subprocess as sp
 import os
+from typing import List, Union
+from scm import plams
 
 
 j = os.path.join
 
 
 class ORCAJob(Job):
-    def __init__(self, *args, **kwargs):
+    def __init__(self, use_tmpdir=False, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.settings.main = {'LARGEPRINT'}
+        self.settings.main = set()
         self._charge = 0
         self._multiplicity = 1
+        self._ghosts = []
+        self._method = None
         self.memory = None
         self.processes = None
         self.orca_path = None
+        self.use_tmpdir = use_tmpdir
 
         self.single_point()
 
-    def __casefold_main(self):
-        self.settings.main = {key.casefold() for key in self.settings.main}
+    def main(self, val: Union[str, List[str]]):
+        '''
+        Add main options for this ORCA calculation, they will be added to the input prepended with exclamation marks.
+
+        Args:
+            val: the main options to add. This can be a string or a list of strings with the main options.
+        '''
+        # we want to split a string such as 'CC-pVTZ Opt CCSD(T)' into loose parts and add them separately
+        # this should always return a list
+        if isinstance(val, str):
+            val = val.split()
+        # add each 
+        [self.settings.main.add(key) for key in val]
+
+    def remove_main(self, val: Union[str, List[str]]):
+        if isinstance(val, str):
+            val = val.split()
+
+        lower_main = {key.casefold(): key for key in self.settings.main}
+        for v in val:
+            if v.casefold() in lower_main:
+                self.settings.main.discard(lower_main[v.casefold()])
 
     def __remove_task(self):
-        self.__casefold_main()
-        [self.settings.main.discard(task) for task in ['sp', 'opt', 'tsopt', 'neb-ts']]
+        [self.remove_main(task) for task in ['sp', 'opt', 'optts', 'neb-ts']]
+
+    def method(self, method):
+        spell_check.check(method, ['MP2', 'CCSD', 'CCSD(T)', 'CCSDT'])
+        self.settings.main.add(method)
+        self._method = method
+
+    def reference(self, ref):
+        spell_check.check(ref, ['UNO', 'UHF', 'UKS', 'RHF', 'RKS', 'ROHF', 'ROKS'])
+        self.settings.main.add(ref)
+        self._method = ref
+
+    def QRO(self, enable=True):
+        self.settings.MDCI.UseQROs = enable
+
+    def basis_set(self, value):
+        self.settings.main.add(value)
 
     def single_point(self):
         self.__remove_task()
-        self.settings.main.add('sp')
+        self.settings.main.add('SP')
 
     def transition_state(self):
         self.__remove_task()
         self.vibrations()
-        self.settings.main.add('optts')
+        self.settings.main.add('OptTS')
 
     def optimization(self):
         self.__remove_task()
         self.vibrations()
-        self.settings.main.add('opt')
+        self.settings.main.add('Opt')
 
     def vibrations(self, enable=True, numerical=False):
-        self.__casefold_main()
-        self.settings.main.discard('numfreq')
-        self.settings.main.discard('freq')
+        self.remove_main('NumFreq')
+        self.remove_main('Freq')
         if not enable:
             return
         if numerical:
-            self.settings.main.add('numfreq')
+            self.settings.main.add('NumFreq')
         else:
-            self.settings.main.add('freq')
+            self.settings.main.add('Freq')
 
     def charge(self, val):
         self._charge = val
 
     def spin_polarization(self, val):
-        self._multiplicity = 2 * val + 1
+        self._multiplicity = val + 1
 
     def multiplicity(self, val):
         self._multiplicity = val
 
+    def ghost_atoms(self, indices: Union[int, List[int]]):
+        self._ghosts.extend(ensure_list(indices))
+
     def get_memory_usage(self):
         mem = self.memory or self._sbatch.mem or None
 
@@ -74,11 +116,28 @@ def get_memory_usage(self):
 
         return mem, ntasks
 
+    def molecule(self, mol: Union[str, plams.Molecule, plams.Atom, list[plams.Atom]], natoms: int = None):
+        '''
+        Add a molecule to this calculation in various formats.
+
+        Args:
+            mol: the molecule to read, can be a path (str). If the path exists already we read it. If it does not exist yet, it will be read in later. mol can also be a plams.Molecule object or a single or a list of plams.Atom objects.
+            natoms: If the molecule is supplied as a path you should also give the number of atoms.
+        '''
+        super().molecule(mol)
+        self.natoms = natoms
+
     def get_input(self):
         # set the correct memory usage and processes
         mem, ntasks = self.get_memory_usage()
         if ntasks and mem:
-            natoms = len(self._molecule)
+            if self._molecule is not None:
+                natoms = len(self._molecule) - len(self._ghosts)
+            else:
+                if not hasattr(self, 'natoms') or self.natoms is None:
+                    raise ValueError('You set the molecule as a path and did not supply the number of atoms.')
+                natoms = self.natoms
+
             ntasks = min(ntasks, (natoms - 1) * 3)
             self.settings.PAL.nprocs = ntasks
             self.settings.maxcore = int(mem / ntasks * 0.75)
@@ -107,42 +166,71 @@ def get_input(self):
         ret += '\n'
 
         if self._molecule_path:
-            ret += f'* xyz {self._charge} {self._multiplicity} {os.path.abspath(self._molecule_path)}\n'
+            ret += f'* xyzfile {self._charge} {self._multiplicity} {os.path.abspath(self._molecule_path)}\n'
 
         else:
             ret += f'* xyz {self._charge} {self._multiplicity}\n'
-            for atom in self._molecule:
-                ret += f'    {atom.symbol:2} {atom.x: >13f} {atom.y: >13f} {atom.z: >13f}\n'
+            for i, atom in enumerate(self._molecule, start=1):
+                if i in self._ghosts:
+                    ret += f'    {atom.symbol:2}: {atom.x: >13f} {atom.y: >13f} {atom.z: >13f}\n'
+                else:
+                    ret += f'    {atom.symbol:3} {atom.x: >13f} {atom.y: >13f} {atom.z: >13f}\n'
             ret += '*\n'
 
         return ret
 
     def _setup_job(self):
         try:
-            if self.orca_path is None:
+            if self.orca_path is None and not self.test_mode:
                 self.orca_path = sp.check_output(['which', 'orca']).decode().strip()
         except sp.CalledProcessError:
-            log.error('Could not find the orca path. Please set it manually.')
-            return
+            log.warn(f'Could not find the orca path. Set the {self.__class__.__name__}.orca_path attribute to add it. Now setting it to "$(which orca)", make sure the orca executable is findable.')
+            self.orca_path = '$(which orca)'
 
         if not self._molecule and not self._molecule_path:
-            log.error(f'You did not supply a molecule for this job. Call the {self.__class__}.molecule method to add one.')
+            log.error(f'You did not supply a molecule for this job. Call the {self.__class__.__name__}.molecule method to add one.')
             return
 
         os.makedirs(self.workdir, exist_ok=True)
         with open(self.inputfile_path, 'w+') as inp:
             inp.write(self.get_input())
 
         with open(self.runfile_path, 'w+') as runf:
-            runf.write('#!/bin/sh\n\n')  # the shebang is not written by default by ADF
+            runf.write('#!/bin/sh\n\n')
             runf.write('\n'.join(self._preambles) + '\n\n')
-            runf.write(f'{self.orca_path} {self.inputfile_path}\n')
+
+            # when using temporary directories for SLURM we need to do some extra setup
+            # this is mainly moving the calculation directory to the TMPDIR location
+            # and after the jobs is finished we copy back the results and remove the TMPDIR
+            if self.use_tmpdir and slurm.has_slurm():
+                runf.write('export TMPDIR="$TMPDIR/$SLURM_JOB_ID"\n')
+                runf.write('mkdir -p $TMPDIR\n')
+                runf.write('cd $TMPDIR\n')
+                runf.write(f'cp {self.inputfile_path} $TMPDIR\n')
+
+                runf.write(f'{self.orca_path} $TMPDIR/{self.name}.in\n')
+
+                runf.write(f'cp $TMPDIR/* {self.workdir}\n')
+                runf.write('rm -rf $TMPDIR\n')
+
+            else:
+                runf.write(f'{self.orca_path} {self.inputfile_path}.in\n')
+
             runf.write('\n'.join(self._postambles))
 
         return True
 
+    @property
+    def output_mol_path(self):
+        '''
+        The default file path for output molecules when running ADF calculations. It will not be created for singlepoint calculations.
+        '''
+        return j(self.workdir, 'OPT.xyz')
+
+
+
 
 if __name__ == '__main__':
     job = ORCAJob()
-    job.molecule('water.xyz')
-    job._setup_job()
+    job.main('OPT cc-pVTZ')
+    job.remove_main('OPT OPTTS NEB')
diff --git a/src/tcutility/results/__init__.py b/src/tcutility/results/__init__.py
@@ -94,8 +94,19 @@ def read(calc_dir: Union[str, pl.Path]) -> Result:
         ret.dftb = dftb.get_calc_settings(ret)
         ret.properties = dftb.get_properties(ret)
     elif ret.engine == "orca":
-        ret.orca = orca.get_calc_settings(ret)
-        ret.properties = orca.get_properties(ret)
+        try:
+            ret.orca = orca.get_calc_settings(ret)
+        except:
+            ret.orca = None
+            print('Error reading:', calc_dir)
+            raise
+
+        try:
+            ret.properties = orca.get_properties(ret)
+        except:
+            ret.properties = None
+            print('Error reading:', calc_dir)
+            raise
 
     # unload cached KFReaders associated with this calc_dir
     to_delete = [key for key in cache._cache if key.startswith(os.path.abspath(calc_dir))]