diff --git a/recipes/job/orca_counter_poise.py b/recipes/job/orca_counter_poise.py new file mode 100644 index 00000000..57e3c8ff --- /dev/null +++ b/recipes/job/orca_counter_poise.py @@ -0,0 +1,62 @@ +from tcutility.job import ORCAJob +from tcutility import molecule + + + +test_mode = False +sbatch = dict(p='tc', n=32, mem=224000, t='120:00:00') + +mol = molecule.load('RC_C2H4_CH3.xyz') +frags = molecule.guess_fragments(mol) + +# for each fragment we do an optimization and single point on the complex geometry +for fragname, fragmol in frags.items(): + with ORCAJob(test_mode=test_mode) as job: + job.molecule(fragmol) + job.optimization() + job.name = f'frag_{fragname}_GO' + job.spin_polarization(fragmol.flags.get('spinpol', 0)) + job.sbatch(**sbatch) + job.rundir = 'calculations' + job.add_preamble('export PATH=/scistor/tc/dra480/bin/ompi411/bin:$PATH') + job.add_preamble('export LD_LIBRARY_PATH=/scistor/tc/dra480/bin/ompi411/lib/:$LD_LIBRARY_PATH') + job.orca_path = "/scistor/tc/dra480/bin/orca500/orca" + job.settings.mdci.UseQROs = 'True' + [job.settings.main.add(key) for key in 'CCSD(T) CC-pVTZ TightSCF UNO'.split()] + + with ORCAJob(test_mode=test_mode) as job: + job.molecule(fragmol) + job.name = f'frag_{fragname}_SP' + job.spin_polarization(fragmol.flags.get('spinpol', 0)) + job.sbatch(**sbatch) + job.rundir = 'calculations' + job.add_preamble('export PATH=/scistor/tc/dra480/bin/ompi411/bin:$PATH') + job.add_preamble('export LD_LIBRARY_PATH=/scistor/tc/dra480/bin/ompi411/lib/:$LD_LIBRARY_PATH') + job.orca_path = "/scistor/tc/dra480/bin/orca500/orca" + job.settings.mdci.UseQROs = 'True' + [job.settings.main.add(key) for key in 'CCSD(T) CC-pVTZ TightSCF UNO'.split()] + + with ORCAJob(test_mode=test_mode) as job: + job.molecule(mol) + job.name = f'complex_frag_{fragname}_BS' + job.spin_polarization(sum([fragmol.flags.get('spinpol', 0) for fragmol in frags.values()]) - fragmol.flags.get('spinpol', 0)) + job.ghost_atoms([mol.atoms.index(atom) + 1 for atom in fragmol]) + job.sbatch(**sbatch) + job.rundir = 'calculations' + job.add_preamble('export PATH=/scistor/tc/dra480/bin/ompi411/bin:$PATH') + job.add_preamble('export LD_LIBRARY_PATH=/scistor/tc/dra480/bin/ompi411/lib/:$LD_LIBRARY_PATH') + job.orca_path = "/scistor/tc/dra480/bin/orca500/orca" + job.settings.mdci.UseQROs = 'True' + [job.settings.main.add(key) for key in 'CCSD(T) CC-pVTZ TightSCF UNO'.split()] + +with ORCAJob(test_mode=test_mode) as job: + job.molecule(mol) + job.name = 'complex' + job.spin_polarization(sum([fragmol.flags.get('spinpol', 0) for fragmol in frags.values()])) + job.sbatch(**sbatch) + job.rundir = 'calculations' + job.add_preamble('export PATH=/scistor/tc/dra480/bin/ompi411/bin:$PATH') + job.add_preamble('export LD_LIBRARY_PATH=/scistor/tc/dra480/bin/ompi411/lib/:$LD_LIBRARY_PATH') + job.orca_path = "/scistor/tc/dra480/bin/orca500/orca" + job.settings.mdci.UseQROs = 'True' + [job.settings.main.add(key) for key in 'CCSD(T) CC-pVTZ TightSCF UNO'.split()] diff --git a/src/tcutility/job/generic.py b/src/tcutility/job/generic.py index 7eb117e7..b5b06e34 100644 --- a/src/tcutility/job/generic.py +++ b/src/tcutility/job/generic.py @@ -69,7 +69,11 @@ def sbatch(self, **kwargs): Change slurm settings, for example, to change the partition or change the number of cores to use. The arguments are the same as you would use for sbatch (`see sbatch manual `_). E.g. to change the partition to 'tc' call: - ``job.sbatch(p='tc')`` or ``job.sbatch(partition='tc')`` + ``job.sbatch(p='tc')`` or ``job.sbatch(partition='tc')``. + + Flags can be set as arguments with a boolean to enable or disable them: + + ``job.sbatch(exclusive=True)`` will set the ``--exclusive`` flag. .. warning:: diff --git a/src/tcutility/job/orca.py b/src/tcutility/job/orca.py index aa99480f..a7d4d857 100644 --- a/src/tcutility/job/orca.py +++ b/src/tcutility/job/orca.py @@ -1,65 +1,107 @@ -from tcutility import log, results +from tcutility import log, results, ensure_list, spell_check, slurm from tcutility.job.generic import Job import subprocess as sp import os +from typing import List, Union +from scm import plams j = os.path.join class ORCAJob(Job): - def __init__(self, *args, **kwargs): + def __init__(self, use_tmpdir=False, *args, **kwargs): super().__init__(*args, **kwargs) - self.settings.main = {'LARGEPRINT'} + self.settings.main = set() self._charge = 0 self._multiplicity = 1 + self._ghosts = [] + self._method = None self.memory = None self.processes = None self.orca_path = None + self.use_tmpdir = use_tmpdir self.single_point() - def __casefold_main(self): - self.settings.main = {key.casefold() for key in self.settings.main} + def main(self, val: Union[str, List[str]]): + ''' + Add main options for this ORCA calculation, they will be added to the input prepended with exclamation marks. + + Args: + val: the main options to add. This can be a string or a list of strings with the main options. + ''' + # we want to split a string such as 'CC-pVTZ Opt CCSD(T)' into loose parts and add them separately + # this should always return a list + if isinstance(val, str): + val = val.split() + # add each + [self.settings.main.add(key) for key in val] + + def remove_main(self, val: Union[str, List[str]]): + if isinstance(val, str): + val = val.split() + + lower_main = {key.casefold(): key for key in self.settings.main} + for v in val: + if v.casefold() in lower_main: + self.settings.main.discard(lower_main[v.casefold()]) def __remove_task(self): - self.__casefold_main() - [self.settings.main.discard(task) for task in ['sp', 'opt', 'tsopt', 'neb-ts']] + [self.remove_main(task) for task in ['sp', 'opt', 'optts', 'neb-ts']] + + def method(self, method): + spell_check.check(method, ['HF', 'MP2', 'CCSD', 'CCSD(T)', 'CCSDT']) + self.settings.main.add(method) + self._method = method + + def reference(self, ref): + spell_check.check(ref, ['UNO', 'UHF', 'UKS', 'RHF', 'RKS', 'ROHF', 'ROKS']) + self.settings.main.add(ref) + self._method = ref + + def QRO(self, enable=True): + self.settings.MDCI.UseQROs = enable + + def basis_set(self, value): + self.settings.main.add(value) def single_point(self): self.__remove_task() - self.settings.main.add('sp') + self.settings.main.add('SP') def transition_state(self): self.__remove_task() self.vibrations() - self.settings.main.add('optts') + self.settings.main.add('OptTS') def optimization(self): self.__remove_task() self.vibrations() - self.settings.main.add('opt') + self.settings.main.add('Opt') def vibrations(self, enable=True, numerical=False): - self.__casefold_main() - self.settings.main.discard('numfreq') - self.settings.main.discard('freq') + self.remove_main('NumFreq') + self.remove_main('Freq') if not enable: return if numerical: - self.settings.main.add('numfreq') + self.settings.main.add('NumFreq') else: - self.settings.main.add('freq') + self.settings.main.add('Freq') def charge(self, val): self._charge = val def spin_polarization(self, val): - self._multiplicity = 2 * val + 1 + self._multiplicity = val + 1 def multiplicity(self, val): self._multiplicity = val + def ghost_atoms(self, indices: Union[int, List[int]]): + self._ghosts.extend(ensure_list(indices)) + def get_memory_usage(self): mem = self.memory or self._sbatch.mem or None @@ -74,11 +116,28 @@ def get_memory_usage(self): return mem, ntasks + def molecule(self, mol: Union[str, plams.Molecule, plams.Atom, list[plams.Atom]], natoms: int = None): + ''' + Add a molecule to this calculation in various formats. + + Args: + mol: the molecule to read, can be a path (str). If the path exists already we read it. If it does not exist yet, it will be read in later. mol can also be a plams.Molecule object or a single or a list of plams.Atom objects. + natoms: If the molecule is supplied as a path you should also give the number of atoms. + ''' + super().molecule(mol) + self.natoms = natoms + def get_input(self): # set the correct memory usage and processes mem, ntasks = self.get_memory_usage() if ntasks and mem: - natoms = len(self._molecule) + if self._molecule is not None: + natoms = len(self._molecule) - len(self._ghosts) + else: + if not hasattr(self, 'natoms') or self.natoms is None: + raise ValueError('You set the molecule as a path and did not supply the number of atoms.') + natoms = self.natoms + ntasks = min(ntasks, (natoms - 1) * 3) self.settings.PAL.nprocs = ntasks self.settings.maxcore = int(mem / ntasks * 0.75) @@ -107,26 +166,29 @@ def get_input(self): ret += '\n' if self._molecule_path: - ret += f'* xyz {self._charge} {self._multiplicity} {os.path.abspath(self._molecule_path)}\n' + ret += f'* xyzfile {self._charge} {self._multiplicity} {os.path.abspath(self._molecule_path)}\n' else: ret += f'* xyz {self._charge} {self._multiplicity}\n' - for atom in self._molecule: - ret += f' {atom.symbol:2} {atom.x: >13f} {atom.y: >13f} {atom.z: >13f}\n' + for i, atom in enumerate(self._molecule, start=1): + if i in self._ghosts: + ret += f' {atom.symbol:2}: {atom.x: >13f} {atom.y: >13f} {atom.z: >13f}\n' + else: + ret += f' {atom.symbol:3} {atom.x: >13f} {atom.y: >13f} {atom.z: >13f}\n' ret += '*\n' return ret def _setup_job(self): try: - if self.orca_path is None: + if self.orca_path is None and not self.test_mode: self.orca_path = sp.check_output(['which', 'orca']).decode().strip() except sp.CalledProcessError: - log.error('Could not find the orca path. Please set it manually.') - return + log.warn(f'Could not find the orca path. Set the {self.__class__.__name__}.orca_path attribute to add it. Now setting it to "$(which orca)", make sure the orca executable is findable.') + self.orca_path = '$(which orca)' if not self._molecule and not self._molecule_path: - log.error(f'You did not supply a molecule for this job. Call the {self.__class__}.molecule method to add one.') + log.error(f'You did not supply a molecule for this job. Call the {self.__class__.__name__}.molecule method to add one.') return os.makedirs(self.workdir, exist_ok=True) @@ -134,15 +196,41 @@ def _setup_job(self): inp.write(self.get_input()) with open(self.runfile_path, 'w+') as runf: - runf.write('#!/bin/sh\n\n') # the shebang is not written by default by ADF + runf.write('#!/bin/sh\n\n') runf.write('\n'.join(self._preambles) + '\n\n') - runf.write(f'{self.orca_path} {self.inputfile_path}\n') + + # when using temporary directories for SLURM we need to do some extra setup + # this is mainly moving the calculation directory to the TMPDIR location + # and after the jobs is finished we copy back the results and remove the TMPDIR + if self.use_tmpdir and slurm.has_slurm(): + runf.write('export TMPDIR="$TMPDIR/$SLURM_JOB_ID"\n') + runf.write('mkdir -p $TMPDIR\n') + runf.write('cd $TMPDIR\n') + runf.write(f'cp {self.inputfile_path} $TMPDIR\n') + + runf.write(f'{self.orca_path} $TMPDIR/{self.name}.in\n') + + runf.write(f'cp $TMPDIR/* {self.workdir}\n') + runf.write('rm -rf $TMPDIR\n') + + else: + runf.write(f'{self.orca_path} {self.inputfile_path}.in\n') + runf.write('\n'.join(self._postambles)) return True + @property + def output_mol_path(self): + ''' + The default file path for output molecules when running ADF calculations. It will not be created for singlepoint calculations. + ''' + return j(self.workdir, 'OPT.xyz') + + + if __name__ == '__main__': job = ORCAJob() - job.molecule('water.xyz') - job._setup_job() + job.main('OPT cc-pVTZ') + job.remove_main('OPT OPTTS NEB') diff --git a/src/tcutility/results/__init__.py b/src/tcutility/results/__init__.py index da7510fc..2c8b119e 100644 --- a/src/tcutility/results/__init__.py +++ b/src/tcutility/results/__init__.py @@ -94,8 +94,19 @@ def read(calc_dir: Union[str, pl.Path]) -> Result: ret.dftb = dftb.get_calc_settings(ret) ret.properties = dftb.get_properties(ret) elif ret.engine == "orca": - ret.orca = orca.get_calc_settings(ret) - ret.properties = orca.get_properties(ret) + try: + ret.orca = orca.get_calc_settings(ret) + except: + ret.orca = None + print('Error reading:', calc_dir) + raise + + try: + ret.properties = orca.get_properties(ret) + except: + ret.properties = None + print('Error reading:', calc_dir) + raise # unload cached KFReaders associated with this calc_dir to_delete = [key for key in cache._cache if key.startswith(os.path.abspath(calc_dir))] diff --git a/src/tcutility/results/orca.py b/src/tcutility/results/orca.py index d353b994..1e32823a 100644 --- a/src/tcutility/results/orca.py +++ b/src/tcutility/results/orca.py @@ -1,5 +1,5 @@ from tcutility.results import Result -from tcutility import constants +from tcutility import constants, slurm import os from scm import plams import numpy as np @@ -147,6 +147,7 @@ def get_input(info: Result) -> Result: if coordinates in ["xyz", "int"]: ret.system.molecule = plams.Molecule() for line in system_lines: + line = line.replace(':', '') ret.system.molecule.add_atom(plams.Atom(symbol=line.split()[0], coords=[float(x) for x in line.split()[1:4]])) info.task = "SinglePoint" @@ -247,12 +248,14 @@ def get_calculation_status(info: Result) -> Result: ret.code = None ret.reasons = [] + # if we do not have an output file the calculation failed if "out" not in info.files.out: ret.reasons.append("Calculation status unknown") ret.name = "UNKNOWN" ret.code = "U" return ret + # try to read if the calculation succeeded with open(info.files.out) as out: lines = out.readlines() if any(["ORCA TERMINATED NORMALLY" in line for line in lines]): @@ -261,8 +264,28 @@ def get_calculation_status(info: Result) -> Result: ret.code = "S" return ret + # if it didnt we default to failed ret.name = "FAILED" ret.code = "F" + + # otherwise we check if the job is being managed by slurm + if not slurm.workdir_info(os.path.abspath(info.files.root)): + return ret + + # get the statuscode from the workdir + state = slurm.workdir_info(os.path.abspath(info.files.root)).statuscode + state_name = { + 'CG': 'COMPLETING', + 'CF': 'CONFIGURING', + 'PD': 'PENDING', + 'R': 'RUNNING' + }.get(state, 'UNKNOWN') + + ret.fatal = False + ret.name = state_name + ret.code = state + ret.reasons = [] + return ret @@ -526,7 +549,3 @@ def get_properties(info: Result) -> Result: return ret - -if __name__ == "__main__": - ret = get_info("/Users/yumanhordijk/Library/CloudStorage/OneDrive-VrijeUniversiteitAmsterdam/RadicalAdditionBenchmark/data/abinitio/P_C2H2_NH2/OPT_pVTZ") - print(ret.molecule) diff --git a/src/tcutility/slurm.py b/src/tcutility/slurm.py index 0e96ae56..e1eb3d0c 100644 --- a/src/tcutility/slurm.py +++ b/src/tcutility/slurm.py @@ -81,10 +81,17 @@ def sbatch(runfile: str, **options: dict) -> results.Result: cmd = 'sbatch ' for key, val in options.items(): key = key.replace('_', '-') - if len(key) > 1: - cmd += f'--{key}={val} ' + + if val is True: + if len(key) > 1: + cmd += f'--{key} ' + else: + cmd += f'-{key} ' else: - cmd += f'-{key} {val} ' + if len(key) > 1: + cmd += f'--{key}={val} ' + else: + cmd += f'-{key} {val} ' cmd = cmd + runfile