openforcefield · jthorton · Apr 27, 2022 · Apr 29, 2022 · Apr 29, 2022 · May 20, 2022
diff --git a/openff/bespokefit/cli/executor/run.py b/openff/bespokefit/cli/executor/run.py
@@ -22,6 +22,7 @@ def _run_cli(
     force_field_path: Optional[str],
     target_torsion_smirks: Tuple[str],
     default_qc_spec: Optional[Tuple[str, str, str]],
+    evaluation_qc_spec: Optional[Tuple[str, str, str]],
-    default_qc_spec: Optional[Tuple[str, str, str]],
-    evaluation_qc_spec: Optional[Tuple[str, str, str]],
+    default_qc_spec: Optional[Tuple[str, str, str]],
+    optimization_qc_spec: Optional[Tuple[str, str, str]],
-    default_qc_spec: Optional[Tuple[str, str, str]],
-    evaluation_qc_spec: Optional[Tuple[str, str, str]],
+    default_qc_spec: Optional[Tuple[str, str, str]],
+    optimization_qc_spec: Optional[Tuple[str, str, str]],
     workflow_name: Optional[str],
     workflow_file_name: Optional[str],
     directory: Optional[str],
@@ -85,6 +86,7 @@ def _run_cli(
                 force_field_path=force_field_path,
                 target_torsion_smirks=target_torsion_smirks,
                 default_qc_spec=default_qc_spec,
+                evaluation_qc_spec=evaluation_qc_spec,
                 workflow_name=workflow_name,
                 workflow_file_name=workflow_file_name,
                 allow_multiple_molecules=False,

diff --git a/openff/bespokefit/cli/executor/submit.py b/openff/bespokefit/cli/executor/submit.py
@@ -88,6 +88,13 @@ def submit_options(allow_multiple_molecules: bool = False):
             "then 'none' should be specified.",
             required=False,
         ),
+        optgroup.option(
+            "--evaluation-qc-spec",
+            type=(str, str, str),
+            help="The program, method and basis used to refine the default specification. In the case of a torsion drive"
+            "this is the specification used to evaluate the PES on the geometries from the default spec.",
+            required=False,
+        ),
     ]
 
 
@@ -97,6 +104,7 @@ def _to_input_schema(
     force_field_path: Optional[str],
     target_torsion_smirks: Tuple[str],
     default_qc_spec: Optional[Tuple[str, str, str]],
+    evaluation_qc_spec: Optional[Tuple[str, str, str]],
     workflow_name: Optional[str],
     workflow_file_name: Optional[str],
 ) -> "BespokeOptimizationSchema":
@@ -135,21 +143,28 @@ def _to_input_schema(
             workflow_factory.initial_force_field = force_field_path
         if len(target_torsion_smirks) > 0:
             workflow_factory.target_torsion_smirks = [*target_torsion_smirks]
-        if default_qc_spec is not None:
-
-            program, method, basis = default_qc_spec
-
-            if basis.lower() == "none":
-                basis = None
-
-            workflow_factory.default_qc_specs = [
-                QCSpec(
-                    program=program,
-                    method=method,
-                    basis=basis,
-                    spec_description="CLI provided spec",
+        for spec, name in [
+            (default_qc_spec, "default_qc_spec"),
+            (evaluation_qc_spec, "evaluation_qc_spec"),
+        ]:
+            if spec is not None:
+
+                program, method, basis = spec
+
+                if basis.lower() == "none":
+                    basis = None
+
+                setattr(
+                    workflow_factory,
+                    name,
+                    QCSpec(
+                        program=program,
+                        method=method,
+                        basis=basis,
+                        spec_name=name,
+                        spec_description=f"CLI provided spec {name}",
+                    ),
                 )
-            ]
 
     except FileNotFoundError:
 
@@ -201,6 +216,7 @@ def _submit(
     force_field_path: Optional[str],
     target_torsion_smirks: Tuple[str],
     default_qc_spec: Optional[Tuple[str, str, str]],
+    evaluation_qc_spec: Optional[Tuple[str, str, str]],
     workflow_name: Optional[str],
     workflow_file_name: Optional[str],
     allow_multiple_molecules: bool,
@@ -269,6 +285,7 @@ def _submit(
                 force_field_path,
                 target_torsion_smirks,
                 default_qc_spec,
+                evaluation_qc_spec,
                 workflow_name,
                 workflow_file_name,
             )
@@ -330,6 +347,7 @@ def _submit_cli(
     force_field_path: Optional[List[str]],
     target_torsion_smirks: Tuple[str],
     default_qc_spec: Optional[Tuple[str, str, str]],
+    evaluation_qc_spec: Optional[Tuple[str, str, str]],
     workflow_name: Optional[str],
     workflow_file_name: Optional[str],
     save_submission: bool,
@@ -350,6 +368,7 @@ def _submit_cli(
             force_field_path=force_field_path,
             target_torsion_smirks=target_torsion_smirks,
             default_qc_spec=default_qc_spec,
+            evaluation_qc_spec=evaluation_qc_spec,
             workflow_name=workflow_name,
             workflow_file_name=workflow_file_name,
             allow_multiple_molecules=True,

diff --git a/openff/bespokefit/data/schemas/debug.json b/openff/bespokefit/data/schemas/debug.json
@@ -80,8 +80,7 @@
     "heuristic": "path_length",
     "keep_non_rotor_ring_substituents": false
   },
-  "default_qc_specs": [
-    {
+  "default_qc_spec": {
       "method": "uff",
       "basis": null,
       "program": "rdkit",
@@ -97,6 +96,6 @@
         "mayer_indices"
       ],
       "keywords": null
-    }
-  ]
+  },
+  "evaluation_qc_spec": null
 }
diff --git a/openff/bespokefit/data/schemas/default.json b/openff/bespokefit/data/schemas/default.json
@@ -81,8 +81,7 @@
     "heuristic": "path_length",
     "keep_non_rotor_ring_substituents": false
   },
-  "default_qc_specs": [
-    {
+  "default_qc_spec": {
       "method": "B3LYP-D3BJ",
       "basis": "DZVP",
       "program": "psi4",
@@ -98,6 +97,6 @@
         "mayer_indices"
       ],
       "keywords": null
-    }
-  ]
+  },
+  "evaluation_qc_spec": null
 }
diff --git a/openff/bespokefit/executor/services/qcgenerator/cache.py b/openff/bespokefit/executor/services/qcgenerator/cache.py
@@ -1,13 +1,22 @@
 import hashlib
-from typing import TypeVar, Union
+from typing import TYPE_CHECKING, Optional, TypeVar, Union
 
 import redis
 from openff.toolkit.topology import Molecule
 
 from openff.bespokefit.executor.services.qcgenerator import worker
-from openff.bespokefit.schema.tasks import HessianTask, OptimizationTask, Torsion1DTask
+from openff.bespokefit.schema.tasks import (
+    HessianTask,
+    OptimizationTask,
+    QCGenerationTask,
+    Torsion1DTask,
+)
 from openff.bespokefit.utilities.molecule import canonical_order_atoms
 
+if TYPE_CHECKING:
+    # Only use as a type hint. Use `celery_app.AsyncResult` to initialize
+    from celery.result import AsyncResult
+
 _T = TypeVar("_T", HessianTask, OptimizationTask, Torsion1DTask)
 
 
@@ -50,6 +59,95 @@ def _canonicalize_task(task: _T) -> _T:
     return task
 
 
+def _hash_task(task: QCGenerationTask) -> str:
+    """Returns a hashed representation of a QC task"""
+    return hashlib.sha512(task.json().encode()).hexdigest()
+
+
+def _retrieve_cached_task_id(
+    task_hash: str, redis_connection: redis.Redis
+) -> Optional[str]:
+    """Retrieve the task ID of a cached QC task if present in the redis cache"""
+
+    task_id = redis_connection.hget("qcgenerator:task-ids", task_hash)
+
+    return None if task_id is None else task_id.decode()
+
+
+def _cache_task_id(
+    task_id: str, task_type: str, task_hash: str, redis_connection: redis.Redis
+):
+    """Store the ID of a running QC task in the QC task cache."""
+
+    redis_connection.hset("qcgenerator:types", task_id, task_type)
+    # Make sure to only set the hash after the type is set in case the connection
+    # goes down before this information is entered and subsequently discarded.
+    redis_connection.hset("qcgenerator:task-ids", task_hash, task_id)
+
+
+def _compute_torsion_drive_task(
+    task: Torsion1DTask, redis_connection: redis.Redis
+) -> str:
+    """Submit a torsion drive to celery, optionally chaining together a torsion
+    drive followed by a single point energy re-evaluation."""
+
+    task_id = None
+
+    torsion_drive_task = task.copy(deep=True)
+    torsion_drive_task.sp_specification = None
+
+    torsion_drive_hash = _hash_task(torsion_drive_task)
+    torsion_drive_id = _retrieve_cached_task_id(torsion_drive_hash, redis_connection)
+
+    if torsion_drive_id is None:
+
+        # There are no cached torsion drives at the 'pre-optimise' level of theory
+        # we need to run a torsion drive and then optionally a single point
+        if task.sp_specification is None:
+
+            torsion_drive_id = worker.compute_torsion_drive.delay(
+                task_json=task.json()
+            ).id
+
+        else:
+
+            task_future: AsyncResult = (
+                worker.compute_torsion_drive.s(task_json=task.json())
+                | worker.evaluate_torsion_drive.s(
+                    model_json=task.sp_specification.model.json(),
+                    program=task.sp_specification.program,
+                )
+            ).delay()
+
+            torsion_drive_id = task_future.parent.id
+            task_id = task_future.id
+
+        _cache_task_id(
+            torsion_drive_id, task.type, torsion_drive_hash, redis_connection
+        )
+
+    if task.sp_specification is None:
+        return torsion_drive_id
+
+    if task_id is None:
+
+        # Handle the case where we have a running torsion drive that we need to
+        # append a single point calculation to the end of.
+        task_id = (
+            (
+                worker.wait_for_task.s(torsion_drive_id)
+                | worker.evaluate_torsion_drive.s(
+                    model_json=task.sp_specification.model.json(),
+                    program=task.sp_specification.program,
+                )
+            )
+            .delay()
+            .id
+        )
+
+    return task_id
+
+
 def cached_compute_task(
     task: Union[HessianTask, OptimizationTask, Torsion1DTask],
     redis_connection: redis.Redis,
@@ -58,28 +156,23 @@ def cached_compute_task(
     worker.
     """
 
-    if isinstance(task, Torsion1DTask):
-        compute = worker.compute_torsion_drive
-    elif isinstance(task, OptimizationTask):
-        compute = worker.compute_optimization
-    elif isinstance(task, HessianTask):
-        compute = worker.compute_hessian
-    else:
-        raise NotImplementedError()
-
     # Canonicalize the task to improve the cache hit rate.
     task = _canonicalize_task(task)
 
-    task_hash = hashlib.sha512(task.json().encode()).hexdigest()
-    task_id = redis_connection.hget("qcgenerator:task-ids", task_hash)
+    task_hash = _hash_task(task)
+    task_id = _retrieve_cached_task_id(task_hash, redis_connection)
 
     if task_id is not None:
-        return task_id.decode()
+        return task_id
 
-    task_id = compute.delay(task_json=task.json()).id
+    if isinstance(task, Torsion1DTask):
+        task_id = _compute_torsion_drive_task(task, redis_connection)
+    elif isinstance(task, OptimizationTask):
+        task_id = worker.compute_optimization.delay(task_json=task.json()).id
+    elif isinstance(task, HessianTask):
+        task_id = worker.compute_hessian.delay(task_json=task.json()).id
+    else:
+        raise NotImplementedError()
 
-    redis_connection.hset("qcgenerator:types", task_id, task.type)
-    # Make sure to only set the hash after the type is set in case the connection
-    # goes down before this information is entered and subsequently discarded.
-    redis_connection.hset("qcgenerator:task-ids", task_hash, task_id)
+    _cache_task_id(task_id, task.type, task_hash, redis_connection)
     return task_id