diff --git a/packages/service-library/src/servicelib/file_utils.py b/packages/service-library/src/servicelib/file_utils.py index f05f35af329..c90468cba2a 100644 --- a/packages/service-library/src/servicelib/file_utils.py +++ b/packages/service-library/src/servicelib/file_utils.py @@ -1,8 +1,10 @@ import asyncio import hashlib import shutil +from contextlib import contextmanager +from logging import Logger from pathlib import Path -from typing import Final, Protocol +from typing import Final, Iterator, Protocol # https://docs.python.org/3/library/shutil.html#shutil.rmtree # https://docs.python.org/3/library/os.html#os.remove @@ -60,10 +62,59 @@ async def create_sha256_checksum( async def _eval_hash_async( async_stream: AsyncStream, - hasher: "hashlib._Hash", # noqa: SLF001 + hasher: "hashlib._Hash", chunk_size: ByteSize, ) -> str: while chunk := await async_stream.read(chunk_size): hasher.update(chunk) digest = hasher.hexdigest() return f"{digest}" + + +def _get_file_properties(path: Path) -> tuple[float, int]: + stats = path.stat() + return stats.st_mtime, stats.st_size + + +def _get_directory_snapshot(path: Path) -> dict[str, tuple[float, int]]: + return { + f"{p.relative_to(path)}": _get_file_properties(p) + for p in path.rglob("*") + if p.is_file() + } + + +@contextmanager +def log_directory_changes(path: Path, logger: Logger, log_level: int) -> Iterator[None]: + before: dict[str, tuple[float, int]] = _get_directory_snapshot(path) + yield + after: dict[str, tuple[float, int]] = _get_directory_snapshot(path) + + after_keys: set[str] = set(after.keys()) + before_keys: set[str] = set(before.keys()) + common_keys = before_keys & after_keys + + added_elements = after_keys - before_keys + removed_elements = before_keys - after_keys + content_changed_elements = {x for x in common_keys if before[x] != after[x]} + + if added_elements or removed_elements or content_changed_elements: + logger.log(log_level, "File changes in path: '%s'", f"{path}") + if added_elements: + logger.log( + log_level, + "Files added:\n%s", + "\n".join([f"+ {x}" for x in sorted(added_elements)]), + ) + if removed_elements: + logger.log( + log_level, + "Files removed:\n%s", + "\n".join([f"- {x}" for x in sorted(removed_elements)]), + ) + if content_changed_elements: + logger.log( + log_level, + "File content changed:\n%s", + "\n".join([f"* {x}" for x in sorted(content_changed_elements)]), + ) diff --git a/packages/service-library/tests/test_file_utils.py b/packages/service-library/tests/test_file_utils.py index 454106c22b4..b5feff78603 100644 --- a/packages/service-library/tests/test_file_utils.py +++ b/packages/service-library/tests/test_file_utils.py @@ -1,11 +1,14 @@ # pylint: disable=redefined-outer-name # pylint: disable=unused-argument +import logging from pathlib import Path import pytest from faker import Faker -from servicelib.file_utils import remove_directory +from servicelib.file_utils import log_directory_changes, remove_directory + +_logger = logging.getLogger(__name__) @pytest.fixture @@ -80,3 +83,60 @@ async def test_remove_not_existing_directory_rasing_error( await remove_directory( path=missing_path, only_children=only_children, ignore_errors=False ) + + +async def test_log_directory_changes(caplog: pytest.LogCaptureFixture, some_dir: Path): + # directory cretion triggers no changes + caplog.clear() + with log_directory_changes(some_dir, _logger, logging.ERROR): + (some_dir / "a-dir").mkdir(parents=True, exist_ok=True) + assert "File changes in path" not in caplog.text + assert "Files added:" not in caplog.text + assert "Files removed:" not in caplog.text + assert "File content changed" not in caplog.text + + # files were added + caplog.clear() + with log_directory_changes(some_dir, _logger, logging.ERROR): + (some_dir / "hoho").touch() + assert "File changes in path" in caplog.text + assert "Files added:" in caplog.text + assert "Files removed:" not in caplog.text + assert "File content changed" not in caplog.text + + # files were removed + caplog.clear() + with log_directory_changes(some_dir, _logger, logging.ERROR): + await remove_directory(path=some_dir) + assert "File changes in path" in caplog.text + assert "Files removed:" in caplog.text + assert "Files added:" not in caplog.text + assert "File content changed" not in caplog.text + + # nothing changed + caplog.clear() + with log_directory_changes(some_dir, _logger, logging.ERROR): + pass + assert caplog.text == "" + + # files added and removed + caplog.clear() + some_dir.mkdir(parents=True, exist_ok=True) + (some_dir / "som_other_file").touch() + with log_directory_changes(some_dir, _logger, logging.ERROR): + (some_dir / "som_other_file").unlink() + (some_dir / "som_other_file_2").touch() + assert "File changes in path" in caplog.text + assert "Files added:" in caplog.text + assert "Files removed:" in caplog.text + assert "File content changed" not in caplog.text + + # file content changed + caplog.clear() + (some_dir / "file_to_change").touch() + with log_directory_changes(some_dir, _logger, logging.ERROR): + (some_dir / "file_to_change").write_text("ab") + assert "File changes in path" in caplog.text + assert "Files added:" not in caplog.text + assert "Files removed:" not in caplog.text + assert "File content changed" in caplog.text diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py index cc0b76c9197..592ce9c39c3 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py @@ -11,6 +11,7 @@ from models_library.rabbitmq_messages import ProgressType, SimcorePlatformStatus from pydantic import PositiveInt from servicelib.fastapi.long_running_tasks.server import TaskProgress +from servicelib.file_utils import log_directory_changes from servicelib.logging_utils import log_context from servicelib.progress_bar import ProgressBarData from servicelib.utils import logged_gather @@ -476,15 +477,18 @@ async def task_ports_inputs_pull( ), description="pulling inputs", ) as root_progress: - transferred_bytes = await nodeports.download_target_ports( - nodeports.PortTypeName.INPUTS, - mounted_volumes.disk_inputs_path, - port_keys=port_keys, - io_log_redirect_cb=functools.partial( - post_sidecar_log_message, app, log_level=logging.INFO - ), - progress_bar=root_progress, - ) + with log_directory_changes( + mounted_volumes.disk_inputs_path, _logger, logging.INFO + ): + transferred_bytes = await nodeports.download_target_ports( + nodeports.PortTypeName.INPUTS, + mounted_volumes.disk_inputs_path, + port_keys=port_keys, + io_log_redirect_cb=functools.partial( + post_sidecar_log_message, app, log_level=logging.INFO + ), + progress_bar=root_progress, + ) await post_sidecar_log_message( app, "Finished pulling inputs", log_level=logging.INFO )