Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add single point jobs to the end of torsiondrives #167

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions openff/bespokefit/cli/executor/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def _run_cli(
force_field_path: Optional[str],
target_torsion_smirks: Tuple[str],
default_qc_spec: Optional[Tuple[str, str, str]],
evaluation_qc_spec: Optional[Tuple[str, str, str]],
Comment on lines 24 to +25
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be good to swap this to

Suggested change
default_qc_spec: Optional[Tuple[str, str, str]],
evaluation_qc_spec: Optional[Tuple[str, str, str]],
default_qc_spec: Optional[Tuple[str, str, str]],
optimization_qc_spec: Optional[Tuple[str, str, str]],

as the optimization spec is likely the optional thing users will want to tweak, rather than the final level of theory?

workflow_name: Optional[str],
workflow_file_name: Optional[str],
directory: Optional[str],
Expand Down Expand Up @@ -85,6 +86,7 @@ def _run_cli(
force_field_path=force_field_path,
target_torsion_smirks=target_torsion_smirks,
default_qc_spec=default_qc_spec,
evaluation_qc_spec=evaluation_qc_spec,
workflow_name=workflow_name,
workflow_file_name=workflow_file_name,
allow_multiple_molecules=False,
Expand Down
47 changes: 33 additions & 14 deletions openff/bespokefit/cli/executor/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ def submit_options(allow_multiple_molecules: bool = False):
"then 'none' should be specified.",
required=False,
),
optgroup.option(
"--evaluation-qc-spec",
type=(str, str, str),
help="The program, method and basis used to refine the default specification. In the case of a torsion drive"
"this is the specification used to evaluate the PES on the geometries from the default spec.",
required=False,
),
]


Expand All @@ -97,6 +104,7 @@ def _to_input_schema(
force_field_path: Optional[str],
target_torsion_smirks: Tuple[str],
default_qc_spec: Optional[Tuple[str, str, str]],
evaluation_qc_spec: Optional[Tuple[str, str, str]],
workflow_name: Optional[str],
workflow_file_name: Optional[str],
) -> "BespokeOptimizationSchema":
Expand Down Expand Up @@ -135,21 +143,28 @@ def _to_input_schema(
workflow_factory.initial_force_field = force_field_path
if len(target_torsion_smirks) > 0:
workflow_factory.target_torsion_smirks = [*target_torsion_smirks]
if default_qc_spec is not None:

program, method, basis = default_qc_spec

if basis.lower() == "none":
basis = None

workflow_factory.default_qc_specs = [
QCSpec(
program=program,
method=method,
basis=basis,
spec_description="CLI provided spec",
for spec, name in [
(default_qc_spec, "default_qc_spec"),
(evaluation_qc_spec, "evaluation_qc_spec"),
]:
if spec is not None:

program, method, basis = spec

if basis.lower() == "none":
basis = None

setattr(
workflow_factory,
name,
QCSpec(
program=program,
method=method,
basis=basis,
spec_name=name,
spec_description=f"CLI provided spec {name}",
),
)
]

except FileNotFoundError:

Expand Down Expand Up @@ -201,6 +216,7 @@ def _submit(
force_field_path: Optional[str],
target_torsion_smirks: Tuple[str],
default_qc_spec: Optional[Tuple[str, str, str]],
evaluation_qc_spec: Optional[Tuple[str, str, str]],
workflow_name: Optional[str],
workflow_file_name: Optional[str],
allow_multiple_molecules: bool,
Expand Down Expand Up @@ -269,6 +285,7 @@ def _submit(
force_field_path,
target_torsion_smirks,
default_qc_spec,
evaluation_qc_spec,
workflow_name,
workflow_file_name,
)
Expand Down Expand Up @@ -330,6 +347,7 @@ def _submit_cli(
force_field_path: Optional[List[str]],
target_torsion_smirks: Tuple[str],
default_qc_spec: Optional[Tuple[str, str, str]],
evaluation_qc_spec: Optional[Tuple[str, str, str]],
workflow_name: Optional[str],
workflow_file_name: Optional[str],
save_submission: bool,
Expand All @@ -350,6 +368,7 @@ def _submit_cli(
force_field_path=force_field_path,
target_torsion_smirks=target_torsion_smirks,
default_qc_spec=default_qc_spec,
evaluation_qc_spec=evaluation_qc_spec,
workflow_name=workflow_name,
workflow_file_name=workflow_file_name,
allow_multiple_molecules=True,
Expand Down
7 changes: 3 additions & 4 deletions openff/bespokefit/data/schemas/debug.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,7 @@
"heuristic": "path_length",
"keep_non_rotor_ring_substituents": false
},
"default_qc_specs": [
{
"default_qc_spec": {
"method": "uff",
"basis": null,
"program": "rdkit",
Expand All @@ -97,6 +96,6 @@
"mayer_indices"
],
"keywords": null
}
]
},
"evaluation_qc_spec": null
}
7 changes: 3 additions & 4 deletions openff/bespokefit/data/schemas/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@
"heuristic": "path_length",
"keep_non_rotor_ring_substituents": false
},
"default_qc_specs": [
{
"default_qc_spec": {
"method": "B3LYP-D3BJ",
"basis": "DZVP",
"program": "psi4",
Expand All @@ -98,6 +97,6 @@
"mayer_indices"
],
"keywords": null
}
]
},
"evaluation_qc_spec": null
}
131 changes: 112 additions & 19 deletions openff/bespokefit/executor/services/qcgenerator/cache.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
import hashlib
from typing import TypeVar, Union
from typing import TYPE_CHECKING, Optional, TypeVar, Union

import redis
from openff.toolkit.topology import Molecule

from openff.bespokefit.executor.services.qcgenerator import worker
from openff.bespokefit.schema.tasks import HessianTask, OptimizationTask, Torsion1DTask
from openff.bespokefit.schema.tasks import (
HessianTask,
OptimizationTask,
QCGenerationTask,
Torsion1DTask,
)
from openff.bespokefit.utilities.molecule import canonical_order_atoms

if TYPE_CHECKING:
# Only use as a type hint. Use `celery_app.AsyncResult` to initialize
from celery.result import AsyncResult

_T = TypeVar("_T", HessianTask, OptimizationTask, Torsion1DTask)


Expand Down Expand Up @@ -50,6 +59,95 @@ def _canonicalize_task(task: _T) -> _T:
return task


def _hash_task(task: QCGenerationTask) -> str:
"""Returns a hashed representation of a QC task"""
return hashlib.sha512(task.json().encode()).hexdigest()


def _retrieve_cached_task_id(
task_hash: str, redis_connection: redis.Redis
) -> Optional[str]:
"""Retrieve the task ID of a cached QC task if present in the redis cache"""

task_id = redis_connection.hget("qcgenerator:task-ids", task_hash)

return None if task_id is None else task_id.decode()


def _cache_task_id(
task_id: str, task_type: str, task_hash: str, redis_connection: redis.Redis
):
"""Store the ID of a running QC task in the QC task cache."""

redis_connection.hset("qcgenerator:types", task_id, task_type)
# Make sure to only set the hash after the type is set in case the connection
# goes down before this information is entered and subsequently discarded.
redis_connection.hset("qcgenerator:task-ids", task_hash, task_id)


def _compute_torsion_drive_task(
task: Torsion1DTask, redis_connection: redis.Redis
) -> str:
"""Submit a torsion drive to celery, optionally chaining together a torsion
drive followed by a single point energy re-evaluation."""

task_id = None

torsion_drive_task = task.copy(deep=True)
torsion_drive_task.sp_specification = None

torsion_drive_hash = _hash_task(torsion_drive_task)
torsion_drive_id = _retrieve_cached_task_id(torsion_drive_hash, redis_connection)

if torsion_drive_id is None:

# There are no cached torsion drives at the 'pre-optimise' level of theory
# we need to run a torsion drive and then optionally a single point
if task.sp_specification is None:

torsion_drive_id = worker.compute_torsion_drive.delay(
task_json=task.json()
).id

else:

task_future: AsyncResult = (
worker.compute_torsion_drive.s(task_json=task.json())
| worker.evaluate_torsion_drive.s(
model_json=task.sp_specification.model.json(),
program=task.sp_specification.program,
)
).delay()

torsion_drive_id = task_future.parent.id
task_id = task_future.id

_cache_task_id(
torsion_drive_id, task.type, torsion_drive_hash, redis_connection
)

if task.sp_specification is None:
return torsion_drive_id

if task_id is None:

# Handle the case where we have a running torsion drive that we need to
# append a single point calculation to the end of.
task_id = (
(
worker.wait_for_task.s(torsion_drive_id)
| worker.evaluate_torsion_drive.s(
model_json=task.sp_specification.model.json(),
program=task.sp_specification.program,
)
)
.delay()
.id
)

return task_id


def cached_compute_task(
task: Union[HessianTask, OptimizationTask, Torsion1DTask],
redis_connection: redis.Redis,
Expand All @@ -58,28 +156,23 @@ def cached_compute_task(
worker.
"""

if isinstance(task, Torsion1DTask):
compute = worker.compute_torsion_drive
elif isinstance(task, OptimizationTask):
compute = worker.compute_optimization
elif isinstance(task, HessianTask):
compute = worker.compute_hessian
else:
raise NotImplementedError()

# Canonicalize the task to improve the cache hit rate.
task = _canonicalize_task(task)

task_hash = hashlib.sha512(task.json().encode()).hexdigest()
task_id = redis_connection.hget("qcgenerator:task-ids", task_hash)
task_hash = _hash_task(task)
task_id = _retrieve_cached_task_id(task_hash, redis_connection)

if task_id is not None:
return task_id.decode()
return task_id

task_id = compute.delay(task_json=task.json()).id
if isinstance(task, Torsion1DTask):
task_id = _compute_torsion_drive_task(task, redis_connection)
elif isinstance(task, OptimizationTask):
task_id = worker.compute_optimization.delay(task_json=task.json()).id
elif isinstance(task, HessianTask):
task_id = worker.compute_hessian.delay(task_json=task.json()).id
else:
raise NotImplementedError()

redis_connection.hset("qcgenerator:types", task_id, task.type)
# Make sure to only set the hash after the type is set in case the connection
# goes down before this information is entered and subsequently discarded.
redis_connection.hset("qcgenerator:task-ids", task_hash, task_id)
_cache_task_id(task_id, task.type, task_hash, redis_connection)
return task_id
Loading