From 27813abcbb3495d3d1b6ce61d91160ddf7502b62 Mon Sep 17 00:00:00 2001 From: "Josh A. Mitchell" Date: Thu, 30 Mar 2023 23:42:28 +1100 Subject: [PATCH] Add option to always keep temporary files (#199) * Add option to always keep temporary files * Simplify tempcd * Specify type of BEFLOW_KEEP_TMP_FILES * Add tests for temporary_cd * Update temporary_cd to pass tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix test warnings * Add comment * Support multi-part paths without parents argument * Remove print statements from temporary_cd * Changes to optimizer worker to support KEEP_TMP_FILES * Try to keep QCEngine's scratch files * Deprecate BEFLOW_OPTIMIZER_KEEP_FILES * Revert "Try to keep QCEngine's scratch files" This reverts commit ff8dc2e62f9d995d1c23911810c4d8c65def3a3d. * Revert tests to use openff.utilities' temporary_cd * Consolidate various methods of deleting temporary files * Ensure deprecated alias of setting is unset in tests --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- openff/bespokefit/executor/executor.py | 7 +- .../bespokefit/executor/services/_settings.py | 9 ++ .../bespokefit/executor/services/gateway.py | 2 +- .../executor/services/optimizer/worker.py | 13 +- .../optimizers/forcebalance/factories.py | 2 +- openff/bespokefit/optimizers/model.py | 24 +--- .../bespokefit/tests/utilities/test_tempcd.py | 136 ++++++++++++++++++ openff/bespokefit/utilities/tempcd.py | 58 ++++++++ 8 files changed, 218 insertions(+), 33 deletions(-) create mode 100644 openff/bespokefit/tests/utilities/test_tempcd.py create mode 100644 openff/bespokefit/utilities/tempcd.py diff --git a/openff/bespokefit/executor/executor.py b/openff/bespokefit/executor/executor.py index 248590bf..103c0a65 100644 --- a/openff/bespokefit/executor/executor.py +++ b/openff/bespokefit/executor/executor.py @@ -14,7 +14,6 @@ import requests import rich from openff.toolkit.typing.engines.smirnoff import ForceField -from openff.utilities import temporary_cd from pydantic import Field from rich.padding import Padding from typing_extensions import Literal @@ -34,6 +33,7 @@ from openff.bespokefit.schema.fitting import BespokeOptimizationSchema from openff.bespokefit.schema.results import BespokeOptimizationResults from openff.bespokefit.utilities.pydantic import BaseModel +from openff.bespokefit.utilities.tempcd import temporary_cd _T = TypeVar("_T") @@ -214,7 +214,10 @@ def __init__( self._optimizer_worker_config = optimizer_worker_config self._directory = directory - self._remove_directory = directory is None + settings = current_settings() + self._remove_directory = directory is None and not ( + settings.BEFLOW_OPTIMIZER_KEEP_FILES or settings.BEFLOW_KEEP_TMP_FILES + ) self._launch_redis_if_unavailable = launch_redis_if_unavailable diff --git a/openff/bespokefit/executor/services/_settings.py b/openff/bespokefit/executor/services/_settings.py index a70106cb..5c762d2d 100644 --- a/openff/bespokefit/executor/services/_settings.py +++ b/openff/bespokefit/executor/services/_settings.py @@ -62,6 +62,15 @@ class Settings(BaseSettings): BEFLOW_OPTIMIZER_WORKER_N_CORES: Union[int, Literal["auto"]] = "auto" BEFLOW_OPTIMIZER_WORKER_MAX_MEM: Union[float, Literal["auto"]] = "auto" BEFLOW_OPTIMIZER_KEEP_FILES: bool = False + """ + .. deprecated:: 0.2.1 + use BEFLOW_KEEP_TMP_FILES instead + + Keep the optimizer's temporary files. + """ + + BEFLOW_KEEP_TMP_FILES: bool = False + """Keep all temporary files.""" @property def fragmenter_settings(self) -> WorkerSettings: diff --git a/openff/bespokefit/executor/services/gateway.py b/openff/bespokefit/executor/services/gateway.py index 64410269..453bff45 100644 --- a/openff/bespokefit/executor/services/gateway.py +++ b/openff/bespokefit/executor/services/gateway.py @@ -7,10 +7,10 @@ import requests import uvicorn from fastapi import APIRouter, FastAPI -from openff.utilities import temporary_cd from starlette.middleware.cors import CORSMiddleware from openff.bespokefit.executor.services import current_settings +from openff.bespokefit.utilities.tempcd import temporary_cd def __load_router(path: str) -> APIRouter: diff --git a/openff/bespokefit/executor/services/optimizer/worker.py b/openff/bespokefit/executor/services/optimizer/worker.py index dc492336..ab52d179 100644 --- a/openff/bespokefit/executor/services/optimizer/worker.py +++ b/openff/bespokefit/executor/services/optimizer/worker.py @@ -1,8 +1,5 @@ -import os -import shutil from typing import Union -from openff.utilities import temporary_cd from pydantic import parse_raw_as from qcelemental.util import serialize @@ -19,6 +16,7 @@ BespokeOptimizationResults, OptimizationStageResults, ) +from openff.bespokefit.utilities.tempcd import temporary_cd celery_app = configure_celery_app("optimizer", connect_to_default_redis(validate=False)) @@ -41,9 +39,7 @@ def optimize(self, optimization_input_json: str) -> str: stage_results = [] - home = os.getcwd() - - with temporary_cd(): + with temporary_cd(input_schema.id): for i, stage in enumerate(input_schema.stages): optimizer = get_optimizer(stage.optimizer.type) # If there are no parameters to optimise as they have all been cached mock @@ -61,7 +57,6 @@ def optimize(self, optimization_input_json: str) -> str: result = optimizer.optimize( schema=stage, initial_force_field=input_force_field, - keep_files=settings.BEFLOW_OPTIMIZER_KEEP_FILES, root_directory=f"stage_{i}", ) @@ -83,10 +78,6 @@ def optimize(self, optimization_input_json: str) -> str: ).to_string(discard_cosmetic_attributes=True) ) - if settings.BEFLOW_OPTIMIZER_KEEP_FILES: - os.makedirs(os.path.join(home, input_schema.id), exist_ok=True) - shutil.move(os.getcwd(), os.path.join(home, input_schema.id)) - result = BespokeOptimizationResults(input_schema=input_schema, stages=stage_results) # cache the final parameters if ( diff --git a/openff/bespokefit/optimizers/forcebalance/factories.py b/openff/bespokefit/optimizers/forcebalance/factories.py index 41126b58..813f9e1b 100644 --- a/openff/bespokefit/optimizers/forcebalance/factories.py +++ b/openff/bespokefit/optimizers/forcebalance/factories.py @@ -16,7 +16,6 @@ from openff.toolkit.topology import Molecule as OFFMolecule from openff.toolkit.typing.engines.smirnoff import ForceField from openff.units import unit -from openff.utilities import temporary_cd from qcelemental.models import AtomicResult from qcelemental.models.procedures import OptimizationResult, TorsionDriveResult from qcportal.models import TorsionDriveRecord @@ -41,6 +40,7 @@ TorsionProfileTargetSchema, VibrationTargetSchema, ) +from openff.bespokefit.utilities.tempcd import temporary_cd if TYPE_CHECKING: from qcelemental.models import Molecule as QCMolecule diff --git a/openff/bespokefit/optimizers/model.py b/openff/bespokefit/optimizers/model.py index 0ddf2866..a4cfd5e4 100644 --- a/openff/bespokefit/optimizers/model.py +++ b/openff/bespokefit/optimizers/model.py @@ -5,18 +5,17 @@ import abc import copy import os -import shutil from collections import defaultdict from typing import Dict, Optional, Type from openff.toolkit.typing.engines.smirnoff import ForceField -from openff.utilities import temporary_cd from openff.bespokefit.exceptions import OptimizerError, TargetRegisterError from openff.bespokefit.schema.fitting import OptimizationStageSchema from openff.bespokefit.schema.optimizers import OptimizerSchema from openff.bespokefit.schema.results import OptimizationStageResults from openff.bespokefit.schema.targets import BaseTargetSchema +from openff.bespokefit.utilities.tempcd import temporary_cd TargetSchemaType = Type[BaseTargetSchema] @@ -208,7 +207,6 @@ def optimize( cls, schema: OptimizationStageSchema, initial_force_field: ForceField, - keep_files: bool = False, root_directory: Optional[str] = None, ) -> OptimizationStageResults: """ @@ -218,21 +216,11 @@ def optimize( It should loop over the targets and assert they are registered and then dispatch compute and optimization. """ + if root_directory is not None: + os.makedirs(root_directory, exist_ok=True) - try: - if root_directory is not None: - os.makedirs(root_directory, exist_ok=True) - - with temporary_cd(root_directory): - cls.prepare(schema, initial_force_field, ".") - results = cls._optimize(schema, initial_force_field) - - finally: - if ( - root_directory is not None - and not keep_files - and os.path.isdir(root_directory) - ): - shutil.rmtree(root_directory, ignore_errors=True) + with temporary_cd(root_directory): + cls.prepare(schema, initial_force_field, ".") + results = cls._optimize(schema, initial_force_field) return results diff --git a/openff/bespokefit/tests/utilities/test_tempcd.py b/openff/bespokefit/tests/utilities/test_tempcd.py new file mode 100644 index 00000000..3ffcd5bb --- /dev/null +++ b/openff/bespokefit/tests/utilities/test_tempcd.py @@ -0,0 +1,136 @@ +from pathlib import Path + +import pytest + +from openff.bespokefit.utilities.tempcd import temporary_cd + + +@pytest.mark.parametrize( + "path", + [ + "tempcd_test", + "tempcd/test", + Path("tempcd_test"), + None, + ".", + "", + ], +) +class TestTemporaryCD: + @pytest.fixture(autouse=True) + def run_test_in_empty_temporary_directory(self, monkeypatch, tmp_path): + """ + Tests in this class should run in their own empty temporary directories + + This fixture creates a temporary directory for the test, moves into it, + and deletes it when the test is finished. + """ + monkeypatch.chdir(tmp_path) + + def test_tempcd_changes_dir(self, path): + """ + Check temporary_cd changes directory to the target directory + """ + # Arrange + starting_path = Path(".").resolve() + + # Act + with temporary_cd(path): + temp_path = Path(".").resolve() + + # Assert + if path is None: + assert temp_path.parent == starting_path + else: + assert temp_path == (starting_path / path).resolve() + + def test_tempcd_changes_back(self, path): + """ + Check temporary_cd returns to original directory when context manager exits + """ + # Arrange + starting_path = Path(".").resolve() + + # Act + with temporary_cd(path): + pass + + # Assert + assert Path(".").resolve() == starting_path + + def test_tempcd_cleans_up_temporary_directory(self, path, monkeypatch): + """ + Check temporary_cd cleans up temporary directories it creates when + BEFLOW_KEEP_TMP_FILES is not set + + This test is skipped when it is parametrized to operate on the working + directory because the working directory must exist and therefore cannot + be a temporary directory. This check is hard-coded to check for the + path being ``"."`` or ``""``, rather than simply checking if the path + exists, so that conflicts between runs will be detected (though such + conflicts should be prevented by the + ``run_test_in_empty_temporary_directory`` fixture) + """ + if path in [".", ""]: + pytest.skip("'Temporary' directory exists") + + # Arrange + monkeypatch.delenv("BEFLOW_KEEP_TMP_FILES", raising=False) + monkeypatch.delenv("BEFLOW_OPTIMIZER_KEEP_TMP_FILES", raising=False) + + # Act + with temporary_cd(path): + Path("touch").write_text("Ensure cleanup of directories with files") + temp_path = Path(".").resolve() + + # Assert + assert not temp_path.exists() + + def test_tempcd_keeps_temporary_directory(self, path, monkeypatch): + """ + Check temporary_cd keeps temporary directories it creates when + BEFLOW_KEEP_TMP_FILES is set + """ + # Arrange + monkeypatch.setenv("BEFLOW_KEEP_TMP_FILES", "1") + monkeypatch.delenv("BEFLOW_OPTIMIZER_KEEP_TMP_FILES", raising=False) + + # Act + with temporary_cd(path): + temp_path = Path(".").resolve() + + # Assert + assert temp_path.exists() + + @pytest.mark.parametrize("keep_tmp_files", [True, False]) + def test_tempcd_keeps_existing_directory(self, path, monkeypatch, keep_tmp_files): + """ + Check temporary_cd keeps existing directories + + This test is skipped when the path is ``None`` because it guarantees + that the path does not exist. The target directory will be created in + the Arrange section, unless it is the working directory (in which case + it already exists). This test is hard-coded to check for the path being + ``"."`` or ``""``, rather than simply checking if the path exists, so + that conflicts between runs will be detected (though such conflicts + should be prevented by the ``run_test_in_empty_temporary_directory`` + fixture) + """ + if path is None: + pytest.skip("Temporary directory is guaranteed not to exist") + + # Arrange + if path not in [".", ""]: + Path(path).mkdir(parents=True) + if keep_tmp_files: + monkeypatch.setenv("BEFLOW_KEEP_TMP_FILES", "1") + else: + monkeypatch.delenv("BEFLOW_KEEP_TMP_FILES", raising=False) + monkeypatch.delenv("BEFLOW_OPTIMIZER_KEEP_TMP_FILES", raising=False) + + # Act + with temporary_cd(path): + pass + + # Assert + assert Path(path).is_dir() diff --git a/openff/bespokefit/utilities/tempcd.py b/openff/bespokefit/utilities/tempcd.py new file mode 100644 index 00000000..e4df13e0 --- /dev/null +++ b/openff/bespokefit/utilities/tempcd.py @@ -0,0 +1,58 @@ +"""""" +import os +import shutil +import tempfile +from contextlib import contextmanager +from pathlib import Path +from typing import Optional, Union + +from openff.bespokefit.executor.services import current_settings + + +@contextmanager +def temporary_cd(path: Optional[Union[str, Path]] = None): + """ + Context manager to move the current working directory to the path specified. + + If no path is given or the path does not exist, a temporary directory will + be created. This temporary directory and its contents will be deleted when + the context manager exits. + + Parameters + ---------- + path + The path to CD into. If ``None`` or not specified, a temporary directory + will be created. If specified but the path does not exist, a temporary + directory with that name will be created. + """ + # Normalize path to a pathlib Path + path: Optional[Path] = None if path is None else Path(path) + + # Decide whether to clean up based on bespokefit settings + settings = current_settings() + cleanup = not ( + settings.BEFLOW_OPTIMIZER_KEEP_FILES or settings.BEFLOW_KEEP_TMP_FILES + ) + + # If a path is not given, create a temporary directory + if path is None: + path = Path(tempfile.mkdtemp(dir=".")) + # If a path is given but does not already exist, create it + elif not path.exists(): + path.mkdir(parents=True) + # If we didn't create the path, do NOT clean it up + else: + cleanup = False + + old_directory = os.getcwd() + + try: + os.chdir(path) + yield + + finally: + os.chdir(old_directory) + # If we created the directory, clean it up + if cleanup: + print(f"cleaning up temporary directory {path}") + shutil.rmtree(path)