Skip to content

Commit

Permalink
Raise a custom JobFailure error when a job fails (#2410)
Browse files Browse the repository at this point in the history
## Summary of Changes

This PR seeks to close #2407 by raising a custom `JobFailure` exception
when a job fails, which can be used to `try`/`except` calculations more
easily. The `JobFailure` raises the parent ASE error but also has a
`directory` attribute that can be easily accessed by the user without
any need to sift through the log file. The directory info was already
being logged, but now it's also present in the traceback. There is also
a `parent_error` attribute to ensure the original exception is still
accessible by the end user.

Pinging @tomdemeyere for review.

### Requirements

- [X] My PR is focused on a [single feature addition or
bugfix](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/getting-started/best-practices-for-pull-requests#write-small-prs).
- [X] My PR has relevant, comprehensive [unit
tests](https://quantum-accelerators.github.io/quacc/dev/contributing.html#unit-tests).
- [X] My PR is on a [custom
branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-and-deleting-branches-within-your-repository)
(i.e. is _not_ named `main`).

Note: If you are an external contributor, you will see a comment from
[@buildbot-princeton](https://github.com/buildbot-princeton). This is
solely for the maintainers.
  • Loading branch information
Andrew-S-Rosen authored Aug 14, 2024
1 parent 43fd54d commit 39cf639
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 46 deletions.
43 changes: 43 additions & 0 deletions src/quacc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import threading
from importlib.metadata import version
from pathlib import Path
from typing import TYPE_CHECKING

from ase.atoms import Atoms
Expand Down Expand Up @@ -33,6 +34,7 @@
"Remove",
"get_settings",
"QuaccDefault",
"JobFailure",
]


Expand Down Expand Up @@ -94,6 +96,47 @@ def get_settings() -> QuaccSettings:
# Set logging info
logging.basicConfig(level=logging.DEBUG if _settings.DEBUG else logging.INFO)


# Custom exceptions
class JobFailure(Exception):
"""
A custom exception for handling/catching job failures.
Attributes
----------
directory
The directory where the calculations can be found.
parent_error
The Exception that caused the job to fail.
"""

def __init__(
self,
directory: Path | str,
parent_error: Exception | None = None,
message: str = "Calculation failed!",
) -> None:
"""
Initialize the JobFailure exception.
Parameters
----------
directory
The directory where the calculations can be found.
parent_error
The Exception that caused the job to fail.
message
The message to display when the exception is raised.
Returns
-------
None
"""
self.directory = Path(directory)
self.parent_error = parent_error
super().__init__(message)


# Monkeypatching for Prefect
if _settings.WORKFLOW_ENGINE == "prefect":
from prefect.client.schemas import State
Expand Down
9 changes: 5 additions & 4 deletions src/quacc/runners/prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from monty.shutil import gzip_dir

from quacc import get_settings
from quacc import JobFailure, get_settings
from quacc.utils.files import copy_decompress_files, make_unique_dir

if TYPE_CHECKING:
Expand Down Expand Up @@ -155,8 +155,9 @@ def terminate(tmpdir: Path | str, exception: Exception) -> None:
Raises
-------
Exception
The exception that caused the calculation to fail.
JobFailure
The exception that caused the calculation to fail plus additional
metadata.
"""
tmpdir = Path(tmpdir)
settings = get_settings()
Expand All @@ -172,4 +173,4 @@ def terminate(tmpdir: Path | str, exception: Exception) -> None:
old_symlink_path.unlink(missing_ok=True)
symlink_path.symlink_to(job_failed_dir, target_is_directory=True)

raise exception
raise JobFailure(job_failed_dir, message=msg, parent_error=exception) from exception
18 changes: 13 additions & 5 deletions tests/core/recipes/dftb_recipes/test_dftb_recipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import numpy as np
from ase.build import bulk, molecule

from quacc import change_settings
from quacc import JobFailure, change_settings
from quacc.recipes.dftb.core import relax_job, static_job

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -87,8 +87,10 @@ def test_static_job_cu_kpts(tmp_path, monkeypatch):
def test_static_errors(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
atoms = molecule("H2O")
with pytest.raises(RuntimeError, match="failed with command"):
with pytest.raises(JobFailure, match="Calculation failed!") as err:
static_job(atoms, Hamiltonian_MaxSccIterations=1)
with pytest.raises(RuntimeError, match="failed with command"):
raise err.value.parent_error


def test_relax_job_water(tmp_path, monkeypatch):
Expand Down Expand Up @@ -161,8 +163,10 @@ def test_relax_job_cu_supercell_errors(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
atoms = bulk("Cu") * (2, 1, 1)
atoms[0].position += 0.5
with pytest.raises(RuntimeError, match="failed with command"):
with pytest.raises(JobFailure, match="Calculation failed!") as err:
relax_job(atoms, kpts=(3, 3, 3), MaxSteps=1, Hamiltonian_MaxSccIterations=100)
with pytest.raises(RuntimeError, match="failed with command"):
raise err.value.parent_error


@pytest.mark.skipif(os.name == "nt", reason="symlinking not possible on Windows")
Expand All @@ -173,8 +177,10 @@ def test_child_errors(tmp_path, monkeypatch, caplog):
caplog.at_level(logging.INFO),
change_settings({"SCRATCH_DIR": tmp_path / "scratch"}),
):
with pytest.raises(RuntimeError, match="failed with command"):
with pytest.raises(JobFailure, match="Calculation failed!") as err:
static_job(atoms)
with pytest.raises(RuntimeError, match="failed with command"):
raise err.value.parent_error
assert "Calculation failed" in caplog.text
assert "failed-quacc-" in " ".join(os.listdir(tmp_path / "scratch"))

Expand All @@ -187,7 +193,9 @@ def test_child_errors2(tmp_path, monkeypatch, caplog):
caplog.at_level(logging.INFO),
change_settings({"SCRATCH_DIR": tmp_path / "scratch"}),
):
with pytest.raises(RuntimeError, match="failed with command"):
with pytest.raises(JobFailure, match="Calculation failed!") as err:
relax_job(atoms)
with pytest.raises(RuntimeError, match="failed with command"):
raise err.value.parent_error
assert "Calculation failed" in caplog.text
assert "failed-quacc-" in " ".join(os.listdir(tmp_path / "scratch"))
5 changes: 4 additions & 1 deletion tests/core/recipes/espresso_recipes/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from monty.io import zopen
from numpy.testing import assert_allclose, assert_array_equal

from quacc import JobFailure
from quacc.calculators.espresso.espresso import EspressoTemplate
from quacc.recipes.espresso.core import (
ase_relax_job,
Expand Down Expand Up @@ -220,13 +221,15 @@ def test_static_job_test_run(tmp_path, monkeypatch):

assert Path("test.EXIT").exists()

with pytest.raises(CalledProcessError):
with pytest.raises(JobFailure, match="Calculation failed!") as err:
static_job(
atoms,
pseudopotentials=pseudopotentials,
input_data={"pseudo_dir": tmp_path},
test_run=True,
)
with pytest.raises(CalledProcessError):
raise err.value.parent_error


def test_relax_job(tmp_path, monkeypatch):
Expand Down
58 changes: 26 additions & 32 deletions tests/core/recipes/qchem_recipes/mocked/test_qchem_recipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
from ase.optimize import FIRE
from pymatgen.io.qchem.inputs import QCInput

from quacc import _internally_set_settings
from quacc import JobFailure, _internally_set_settings
from quacc.atoms.core import check_charge_and_spin
from quacc.calculators.qchem import QChem
from quacc.recipes.qchem.core import freq_job, relax_job, static_job
from quacc.recipes.qchem.ts import irc_job, quasi_irc_job, ts_job

has_sella = bool(find_spec("sella"))
has_obabel = bool(find_spec("openbabel"))


FILE_DIR = Path(__file__).parent
Expand Down Expand Up @@ -205,16 +206,18 @@ def test_static_job_v4(monkeypatch, tmp_path, os_atoms):
def test_static_job_v5(tmp_path, monkeypatch, test_atoms):
monkeypatch.chdir(tmp_path)

with pytest.raises(
ValueError,
match="Only one of PCM, ISOSVP, SMD, and CMIRSmay be used for solvation",
):
with pytest.raises(JobFailure, match="Calculation failed!") as err:
static_job(
test_atoms,
charge=0,
spin_multiplicity=1,
qchem_dict_set_params={"pcm_dielectric": "3.0", "smd_solvent": "water"},
)
with pytest.raises(
ValueError,
match="Only one of PCM, ISOSVP, SMD, and CMIRSmay be used for solvation",
):
raise err.value.parent_error


@pytest.mark.skipif(has_sella is False, reason="Does not have Sella")
Expand Down Expand Up @@ -308,16 +311,18 @@ def test_relax_job_v3(monkeypatch, tmp_path, test_atoms):
@pytest.mark.skipif(has_sella is False, reason="Does not have Sella")
def test_relax_job_v4(tmp_path, monkeypatch, test_atoms):
monkeypatch.chdir(tmp_path)
with pytest.raises(
ValueError,
match="Only one of PCM, ISOSVP, SMD, and CMIRSmay be used for solvation",
):
with pytest.raises(JobFailure, match="Calculation failed!") as err:
relax_job(
test_atoms,
charge=0,
spin_multiplicity=1,
qchem_dict_set_params={"pcm_dielectric": "3.0", "smd_solvent": "water"},
)
with pytest.raises(
ValueError,
match="Only one of PCM, ISOSVP, SMD, and CMIRSmay be used for solvation",
):
raise err.value.parent_error


def test_freq_job_v1(monkeypatch, tmp_path, test_atoms):
Expand Down Expand Up @@ -434,28 +439,14 @@ def test_ts_job_v3(monkeypatch, tmp_path, test_atoms):


@pytest.mark.skipif(has_sella is False, reason="Does not have Sella")
def test_ts_job_v4(tmp_path, monkeypatch, test_atoms):
@pytest.mark.skipif(has_obabel is False, reason="Does not have openbabel")
def test_ts_job_v4(monkeypatch, tmp_path, test_atoms):
monkeypatch.chdir(tmp_path)
with pytest.raises(
ValueError,
match="Only one of PCM, ISOSVP, SMD, and CMIRSmay be used for solvation",
):
ts_job(
test_atoms,
charge=0,
spin_multiplicity=1,
qchem_dict_set_params={"pcm_dielectric": "3.0", "smd_solvent": "water"},
)

with pytest.raises(
ValueError, match="Only Sella should be used for TS optimization"
):
ts_job(
test_atoms,
charge=0,
spin_multiplicity=1,
qchem_dict_set_params={"pcm_dielectric": "3.0", "smd_solvent": "water"},
opt_params={"optimizer": FIRE},
test_atoms, charge=0, spin_multiplicity=1, opt_params={"optimizer": FIRE}
)


Expand Down Expand Up @@ -530,22 +521,26 @@ def test_irc_job_v1(monkeypatch, tmp_path, test_atoms):
@pytest.mark.skipif(has_sella is False, reason="Does not have Sella")
def test_irc_job_v2(tmp_path, monkeypatch, test_atoms):
monkeypatch.chdir(tmp_path)
with pytest.raises(JobFailure, match="Calculation failed!") as err:
irc_job(test_atoms, charge=0, spin_multiplicity=1, direction="straight")
with pytest.raises(
ValueError, match='direction must be one of "forward" or "reverse"!'
):
irc_job(test_atoms, charge=0, spin_multiplicity=1, direction="straight")
raise err.value.parent_error

with pytest.raises(
ValueError,
match="Only one of PCM, ISOSVP, SMD, and CMIRSmay be used for solvation",
):
with pytest.raises(JobFailure, match="Calculation failed!") as err:
irc_job(
test_atoms,
charge=0,
spin_multiplicity=1,
direction="forward",
qchem_dict_set_params={"pcm_dielectric": "3.0", "smd_solvent": "water"},
)
with pytest.raises(
ValueError,
match="Only one of PCM, ISOSVP, SMD, and CMIRSmay be used for solvation",
):
raise err.value.parent_error

with pytest.raises(
ValueError, match="Only Sella's IRC should be used for IRC optimization"
Expand All @@ -555,7 +550,6 @@ def test_irc_job_v2(tmp_path, monkeypatch, test_atoms):
charge=0,
spin_multiplicity=1,
direction="forward",
qchem_dict_set_params={"pcm_dielectric": "3.0", "smd_solvent": "water"},
opt_params={"optimizer": FIRE},
)

Expand Down
6 changes: 4 additions & 2 deletions tests/core/runners/test_ase.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ase.optimize import BFGS, BFGSLineSearch
from ase.optimize.sciopt import SciPyFminBFGS

from quacc import change_settings, get_settings
from quacc import JobFailure, change_settings, get_settings
from quacc.runners._base import BaseRunner
from quacc.runners.ase import Runner

Expand Down Expand Up @@ -245,5 +245,7 @@ def fn_hook(dyn):
if dyn.atoms:
raise ValueError("Test error")

with pytest.raises(ValueError, match="Test error"):
with pytest.raises(JobFailure, match="Calculation failed!") as err:
Runner(bulk("Cu"), EMT()).run_opt(fn_hook=fn_hook)
with pytest.raises(ValueError, match="Test error"):
raise err.value.parent_error
7 changes: 5 additions & 2 deletions tests/core/runners/test_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ase.build import bulk
from ase.calculators.emt import EMT

from quacc import change_settings, get_settings
from quacc import JobFailure, change_settings, get_settings
from quacc.runners.prep import calc_cleanup, calc_setup, terminate


Expand Down Expand Up @@ -177,7 +177,10 @@ def test_calc_cleanup(tmp_path, monkeypatch):
def test_terminate(tmp_path):
p = tmp_path / "tmp-quacc-1234"
os.mkdir(p)
with pytest.raises(ValueError, match="moo"):
with pytest.raises(JobFailure, match="Calculation failed!") as err:
terminate(p, ValueError("moo"))
with pytest.raises(ValueError, match="moo"):
raise err.value.parent_error
assert err.value.directory == tmp_path / "failed-quacc-1234"
assert not p.exists()
assert Path(tmp_path, "failed-quacc-1234").exists()

0 comments on commit 39cf639

Please sign in to comment.