Skip to content

Commit

Permalink
Add option to always keep temporary files (#199)
Browse files Browse the repository at this point in the history
* Add option to always keep temporary files

* Simplify tempcd

* Specify type of BEFLOW_KEEP_TMP_FILES

* Add tests for temporary_cd

* Update temporary_cd to pass tests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix test warnings

* Add comment

* Support multi-part paths without parents argument

* Remove print statements from temporary_cd

* Changes to optimizer worker to support KEEP_TMP_FILES

* Try to keep QCEngine's scratch files

* Deprecate BEFLOW_OPTIMIZER_KEEP_FILES

* Revert "Try to keep QCEngine's scratch files"

This reverts commit ff8dc2e.

* Revert tests to use openff.utilities' temporary_cd

* Consolidate various methods of deleting temporary files

* Ensure deprecated alias of setting is unset in tests

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
Yoshanuikabundi and pre-commit-ci[bot] authored Mar 30, 2023
1 parent 57f2ba4 commit 27813ab
Show file tree
Hide file tree
Showing 8 changed files with 218 additions and 33 deletions.
7 changes: 5 additions & 2 deletions openff/bespokefit/executor/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import requests
import rich
from openff.toolkit.typing.engines.smirnoff import ForceField
from openff.utilities import temporary_cd
from pydantic import Field
from rich.padding import Padding
from typing_extensions import Literal
Expand All @@ -34,6 +33,7 @@
from openff.bespokefit.schema.fitting import BespokeOptimizationSchema
from openff.bespokefit.schema.results import BespokeOptimizationResults
from openff.bespokefit.utilities.pydantic import BaseModel
from openff.bespokefit.utilities.tempcd import temporary_cd

_T = TypeVar("_T")

Expand Down Expand Up @@ -214,7 +214,10 @@ def __init__(
self._optimizer_worker_config = optimizer_worker_config

self._directory = directory
self._remove_directory = directory is None
settings = current_settings()
self._remove_directory = directory is None and not (
settings.BEFLOW_OPTIMIZER_KEEP_FILES or settings.BEFLOW_KEEP_TMP_FILES
)

self._launch_redis_if_unavailable = launch_redis_if_unavailable

Expand Down
9 changes: 9 additions & 0 deletions openff/bespokefit/executor/services/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,15 @@ class Settings(BaseSettings):
BEFLOW_OPTIMIZER_WORKER_N_CORES: Union[int, Literal["auto"]] = "auto"
BEFLOW_OPTIMIZER_WORKER_MAX_MEM: Union[float, Literal["auto"]] = "auto"
BEFLOW_OPTIMIZER_KEEP_FILES: bool = False
"""
.. deprecated:: 0.2.1
use BEFLOW_KEEP_TMP_FILES instead
Keep the optimizer's temporary files.
"""

BEFLOW_KEEP_TMP_FILES: bool = False
"""Keep all temporary files."""

@property
def fragmenter_settings(self) -> WorkerSettings:
Expand Down
2 changes: 1 addition & 1 deletion openff/bespokefit/executor/services/gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import requests
import uvicorn
from fastapi import APIRouter, FastAPI
from openff.utilities import temporary_cd
from starlette.middleware.cors import CORSMiddleware

from openff.bespokefit.executor.services import current_settings
from openff.bespokefit.utilities.tempcd import temporary_cd


def __load_router(path: str) -> APIRouter:
Expand Down
13 changes: 2 additions & 11 deletions openff/bespokefit/executor/services/optimizer/worker.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import os
import shutil
from typing import Union

from openff.utilities import temporary_cd
from pydantic import parse_raw_as
from qcelemental.util import serialize

Expand All @@ -19,6 +16,7 @@
BespokeOptimizationResults,
OptimizationStageResults,
)
from openff.bespokefit.utilities.tempcd import temporary_cd

celery_app = configure_celery_app("optimizer", connect_to_default_redis(validate=False))

Expand All @@ -41,9 +39,7 @@ def optimize(self, optimization_input_json: str) -> str:

stage_results = []

home = os.getcwd()

with temporary_cd():
with temporary_cd(input_schema.id):
for i, stage in enumerate(input_schema.stages):
optimizer = get_optimizer(stage.optimizer.type)
# If there are no parameters to optimise as they have all been cached mock
Expand All @@ -61,7 +57,6 @@ def optimize(self, optimization_input_json: str) -> str:
result = optimizer.optimize(
schema=stage,
initial_force_field=input_force_field,
keep_files=settings.BEFLOW_OPTIMIZER_KEEP_FILES,
root_directory=f"stage_{i}",
)

Expand All @@ -83,10 +78,6 @@ def optimize(self, optimization_input_json: str) -> str:
).to_string(discard_cosmetic_attributes=True)
)

if settings.BEFLOW_OPTIMIZER_KEEP_FILES:
os.makedirs(os.path.join(home, input_schema.id), exist_ok=True)
shutil.move(os.getcwd(), os.path.join(home, input_schema.id))

result = BespokeOptimizationResults(input_schema=input_schema, stages=stage_results)
# cache the final parameters
if (
Expand Down
2 changes: 1 addition & 1 deletion openff/bespokefit/optimizers/forcebalance/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from openff.toolkit.topology import Molecule as OFFMolecule
from openff.toolkit.typing.engines.smirnoff import ForceField
from openff.units import unit
from openff.utilities import temporary_cd
from qcelemental.models import AtomicResult
from qcelemental.models.procedures import OptimizationResult, TorsionDriveResult
from qcportal.models import TorsionDriveRecord
Expand All @@ -41,6 +40,7 @@
TorsionProfileTargetSchema,
VibrationTargetSchema,
)
from openff.bespokefit.utilities.tempcd import temporary_cd

if TYPE_CHECKING:
from qcelemental.models import Molecule as QCMolecule
Expand Down
24 changes: 6 additions & 18 deletions openff/bespokefit/optimizers/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,17 @@
import abc
import copy
import os
import shutil
from collections import defaultdict
from typing import Dict, Optional, Type

from openff.toolkit.typing.engines.smirnoff import ForceField
from openff.utilities import temporary_cd

from openff.bespokefit.exceptions import OptimizerError, TargetRegisterError
from openff.bespokefit.schema.fitting import OptimizationStageSchema
from openff.bespokefit.schema.optimizers import OptimizerSchema
from openff.bespokefit.schema.results import OptimizationStageResults
from openff.bespokefit.schema.targets import BaseTargetSchema
from openff.bespokefit.utilities.tempcd import temporary_cd

TargetSchemaType = Type[BaseTargetSchema]

Expand Down Expand Up @@ -208,7 +207,6 @@ def optimize(
cls,
schema: OptimizationStageSchema,
initial_force_field: ForceField,
keep_files: bool = False,
root_directory: Optional[str] = None,
) -> OptimizationStageResults:
"""
Expand All @@ -218,21 +216,11 @@ def optimize(
It should loop over the targets and assert they are registered and then dispatch
compute and optimization.
"""
if root_directory is not None:
os.makedirs(root_directory, exist_ok=True)

try:
if root_directory is not None:
os.makedirs(root_directory, exist_ok=True)

with temporary_cd(root_directory):
cls.prepare(schema, initial_force_field, ".")
results = cls._optimize(schema, initial_force_field)

finally:
if (
root_directory is not None
and not keep_files
and os.path.isdir(root_directory)
):
shutil.rmtree(root_directory, ignore_errors=True)
with temporary_cd(root_directory):
cls.prepare(schema, initial_force_field, ".")
results = cls._optimize(schema, initial_force_field)

return results
136 changes: 136 additions & 0 deletions openff/bespokefit/tests/utilities/test_tempcd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from pathlib import Path

import pytest

from openff.bespokefit.utilities.tempcd import temporary_cd


@pytest.mark.parametrize(
"path",
[
"tempcd_test",
"tempcd/test",
Path("tempcd_test"),
None,
".",
"",
],
)
class TestTemporaryCD:
@pytest.fixture(autouse=True)
def run_test_in_empty_temporary_directory(self, monkeypatch, tmp_path):
"""
Tests in this class should run in their own empty temporary directories
This fixture creates a temporary directory for the test, moves into it,
and deletes it when the test is finished.
"""
monkeypatch.chdir(tmp_path)

def test_tempcd_changes_dir(self, path):
"""
Check temporary_cd changes directory to the target directory
"""
# Arrange
starting_path = Path(".").resolve()

# Act
with temporary_cd(path):
temp_path = Path(".").resolve()

# Assert
if path is None:
assert temp_path.parent == starting_path
else:
assert temp_path == (starting_path / path).resolve()

def test_tempcd_changes_back(self, path):
"""
Check temporary_cd returns to original directory when context manager exits
"""
# Arrange
starting_path = Path(".").resolve()

# Act
with temporary_cd(path):
pass

# Assert
assert Path(".").resolve() == starting_path

def test_tempcd_cleans_up_temporary_directory(self, path, monkeypatch):
"""
Check temporary_cd cleans up temporary directories it creates when
BEFLOW_KEEP_TMP_FILES is not set
This test is skipped when it is parametrized to operate on the working
directory because the working directory must exist and therefore cannot
be a temporary directory. This check is hard-coded to check for the
path being ``"."`` or ``""``, rather than simply checking if the path
exists, so that conflicts between runs will be detected (though such
conflicts should be prevented by the
``run_test_in_empty_temporary_directory`` fixture)
"""
if path in [".", ""]:
pytest.skip("'Temporary' directory exists")

# Arrange
monkeypatch.delenv("BEFLOW_KEEP_TMP_FILES", raising=False)
monkeypatch.delenv("BEFLOW_OPTIMIZER_KEEP_TMP_FILES", raising=False)

# Act
with temporary_cd(path):
Path("touch").write_text("Ensure cleanup of directories with files")
temp_path = Path(".").resolve()

# Assert
assert not temp_path.exists()

def test_tempcd_keeps_temporary_directory(self, path, monkeypatch):
"""
Check temporary_cd keeps temporary directories it creates when
BEFLOW_KEEP_TMP_FILES is set
"""
# Arrange
monkeypatch.setenv("BEFLOW_KEEP_TMP_FILES", "1")
monkeypatch.delenv("BEFLOW_OPTIMIZER_KEEP_TMP_FILES", raising=False)

# Act
with temporary_cd(path):
temp_path = Path(".").resolve()

# Assert
assert temp_path.exists()

@pytest.mark.parametrize("keep_tmp_files", [True, False])
def test_tempcd_keeps_existing_directory(self, path, monkeypatch, keep_tmp_files):
"""
Check temporary_cd keeps existing directories
This test is skipped when the path is ``None`` because it guarantees
that the path does not exist. The target directory will be created in
the Arrange section, unless it is the working directory (in which case
it already exists). This test is hard-coded to check for the path being
``"."`` or ``""``, rather than simply checking if the path exists, so
that conflicts between runs will be detected (though such conflicts
should be prevented by the ``run_test_in_empty_temporary_directory``
fixture)
"""
if path is None:
pytest.skip("Temporary directory is guaranteed not to exist")

# Arrange
if path not in [".", ""]:
Path(path).mkdir(parents=True)
if keep_tmp_files:
monkeypatch.setenv("BEFLOW_KEEP_TMP_FILES", "1")
else:
monkeypatch.delenv("BEFLOW_KEEP_TMP_FILES", raising=False)
monkeypatch.delenv("BEFLOW_OPTIMIZER_KEEP_TMP_FILES", raising=False)

# Act
with temporary_cd(path):
pass

# Assert
assert Path(path).is_dir()
58 changes: 58 additions & 0 deletions openff/bespokefit/utilities/tempcd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
""""""
import os
import shutil
import tempfile
from contextlib import contextmanager
from pathlib import Path
from typing import Optional, Union

from openff.bespokefit.executor.services import current_settings


@contextmanager
def temporary_cd(path: Optional[Union[str, Path]] = None):
"""
Context manager to move the current working directory to the path specified.
If no path is given or the path does not exist, a temporary directory will
be created. This temporary directory and its contents will be deleted when
the context manager exits.
Parameters
----------
path
The path to CD into. If ``None`` or not specified, a temporary directory
will be created. If specified but the path does not exist, a temporary
directory with that name will be created.
"""
# Normalize path to a pathlib Path
path: Optional[Path] = None if path is None else Path(path)

# Decide whether to clean up based on bespokefit settings
settings = current_settings()
cleanup = not (
settings.BEFLOW_OPTIMIZER_KEEP_FILES or settings.BEFLOW_KEEP_TMP_FILES
)

# If a path is not given, create a temporary directory
if path is None:
path = Path(tempfile.mkdtemp(dir="."))
# If a path is given but does not already exist, create it
elif not path.exists():
path.mkdir(parents=True)
# If we didn't create the path, do NOT clean it up
else:
cleanup = False

old_directory = os.getcwd()

try:
os.chdir(path)
yield

finally:
os.chdir(old_directory)
# If we created the directory, clean it up
if cleanup:
print(f"cleaning up temporary directory {path}")
shutil.rmtree(path)

0 comments on commit 27813ab

Please sign in to comment.