From aa1787dcb4380f203243b29c3a9f99103c5c9efe Mon Sep 17 00:00:00 2001 From: stangch <171081544+stangch@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:20:13 -0700 Subject: [PATCH] feat(asset-cli): asset diff subcommand to diff directory changes with manfiest (#410) * feat(asset-cli): asset diff subcommand to diff directory changes of manifest Signed-off-by: Tang <171081544+stangch@users.noreply.github.com> * feat(asset-cli): asset diff subcommand to diff local manifest files Signed-off-by: Tang <171081544+stangch@users.noreply.github.com> --------- Signed-off-by: Tang <171081544+stangch@users.noreply.github.com> --- .../client/cli/_groups/asset_group.py | 179 +++++++++++++++++- src/deadline/job_attachments/upload.py | 1 + .../deadline_client/cli/test_cli_asset.py | 178 ++++++++++++++++- 3 files changed, 344 insertions(+), 14 deletions(-) diff --git a/src/deadline/client/cli/_groups/asset_group.py b/src/deadline/client/cli/_groups/asset_group.py index 95932fd8..ba4f5d27 100644 --- a/src/deadline/client/cli/_groups/asset_group.py +++ b/src/deadline/client/cli/_groups/asset_group.py @@ -7,10 +7,13 @@ * diff * download """ +from __future__ import annotations + import os from pathlib import Path import concurrent.futures from typing import List +import logging import glob import click @@ -237,23 +240,63 @@ def asset_upload(root_dir: str, manifest_dir: str, update: bool, **args): @cli_asset.command(name="diff") @click.option("--root-dir", help="The root directory to compare changes to. ") @click.option( - "--manifest", help="The path to manifest folder of the directory to show changes of. " + "--manifest-dir", + required=True, + help="The path to manifest folder of the directory to show changes of. ", ) @click.option( - "--format", - help="Pretty prints diff information with easy to read formatting. ", + "--raw", + help="Outputs the raw JSON info of files and their changed statuses. ", is_flag=True, show_default=True, default=False, ) @_handle_error -def asset_diff(**args): +def asset_diff(root_dir: str, manifest_dir: str, raw: bool, **args): """ - Check file differences of a directory since last snapshot. - - TODO: show example of diff output + Check file differences of a directory since last snapshot, specified by manifest. """ - click.echo("diff shown") + if not os.path.isdir(manifest_dir): + raise NonValidInputError(f"Specified manifest directory {manifest_dir} does not exist. ") + + if root_dir is None: + asset_root_dir = os.path.dirname(manifest_dir) + else: + if not os.path.isdir(root_dir): + raise NonValidInputError(f"Specified root directory {root_dir} does not exist. ") + asset_root_dir = root_dir + + asset_manager = S3AssetManager( + farm_id=" ", queue_id=" ", job_attachment_settings=JobAttachmentS3Settings(" ", " ") + ) + + # get inputs of directory + input_paths = [] + for root, dirs, files in os.walk(asset_root_dir): + for filename in files: + file_path = os.path.join(root, filename) + input_paths.append(Path(file_path)) + + # hash and create manifest of local directory + cache_config = config_file.get_cache_directory() + with HashCache(cache_config) as hash_cache: + directory_manifest_object = asset_manager._create_manifest_file( + input_paths=input_paths, root_path=asset_root_dir, hash_cache=hash_cache + ) + + # parse local manifest + local_manifest_object: BaseAssetManifest = read_local_manifest(manifest=manifest_dir) + + # compare manifests + differences: List[tuple] = compare_manifest( + reference_manifest=local_manifest_object, compare_manifest=directory_manifest_object + ) + + if raw: + click.echo(f"\nFile Diffs: {differences}") + else: + click.echo(f"\n{asset_root_dir}") + pretty_print(file_status_list=differences) @cli_asset.command(name="download") @@ -408,3 +451,123 @@ def update_manifest(manifest: str, new_or_modified_paths: List[tuple]) -> BaseAs manifest_file.write(local_base_asset_manifest.encode()) return local_base_asset_manifest + + +def compare_manifest( + reference_manifest: BaseAssetManifest, compare_manifest: BaseAssetManifest +) -> List[(tuple)]: + """ + Compares two manifests, reference_manifest acting as the base, and compare_manifest acting as manifest with changes. + Returns a list of FileStatus and BaseManifestPath + + """ + reference_dict = { + manifest_path.path: manifest_path for manifest_path in reference_manifest.paths + } + compare_dict = {manifest_path.path: manifest_path for manifest_path in compare_manifest.paths} + + differences = [] + + # Find new files + for file_path, manifest_path in compare_dict.items(): + if file_path not in reference_dict: + differences.append((FileStatus.NEW, manifest_path)) + else: + if reference_dict[file_path].hash != manifest_path.hash: + differences.append((FileStatus.MODIFIED, manifest_path)) + else: + differences.append((FileStatus.UNCHANGED, manifest_path)) + + # Find deleted files + for file_path, manifest_path in reference_dict.items(): + if file_path not in compare_dict: + differences.append((FileStatus.DELETED, manifest_path)) + + return differences + + +def pretty_print(file_status_list: List[(tuple)]): + """ + Prints to command line a formatted file tree structure with corresponding file statuses + """ + + # ASCII characters for the tree structure + PIPE = "│" + HORIZONTAL = "──" + ELBOW = "└" + TEE = "├" + SPACE = " " + + # ANSI escape sequences for colors + COLORS = { + "MODIFIED": "\033[93m", # yellow + "NEW": "\033[92m", # green + "DELETED": "\033[91m", # red + "UNCHANGED": "\033[90m", # grey + "RESET": "\033[0m", # base color + "DIRECTORY": "\033[80m", # grey + } + + # Tooltips: + TOOLTIPS = { + FileStatus.NEW: " +", # added files + FileStatus.DELETED: " -", # deleted files + FileStatus.MODIFIED: " M", # modified files + FileStatus.UNCHANGED: "", # unchanged files + } + + class ColorFormatter(logging.Formatter): + def format(self, record): + message = super().format(record) + return f"{message}" + + # Configure logger + formatter = ColorFormatter("") + handler = logging.StreamHandler() + handler.setFormatter(formatter) + logger = logging.getLogger(__name__) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + logger.propagate = False + + def print_tree(directory_tree, prefix=""): + sorted_entries = sorted(directory_tree.items()) + + for i, (entry, subtree) in enumerate(sorted_entries, start=1): + is_last_entry = i == len(sorted_entries) + symbol = ELBOW + HORIZONTAL if is_last_entry else TEE + HORIZONTAL + is_dir = isinstance(subtree, dict) + color = COLORS["DIRECTORY"] if is_dir else COLORS[subtree.name] + tooltip = TOOLTIPS[FileStatus.UNCHANGED] if is_dir else TOOLTIPS[subtree] + + message = f"{prefix}{symbol}{color}{entry}{tooltip}{COLORS['RESET']}{os.path.sep if is_dir else ''}" + logger.info(message) + + if is_dir: + new_prefix = prefix + (SPACE if is_last_entry else PIPE + SPACE) + print_tree(subtree, new_prefix) + + if not directory_tree: + symbol = ELBOW + HORIZONTAL + message = f"{prefix}{symbol}{COLORS['UNCHANGED']}. {COLORS['RESET']}" + logger.info(message) + + def build_directory_tree(file_status_list: List[tuple]) -> dict[str, dict]: + directory_tree: dict = {} + + def add_to_tree(path, status): + parts = path.split(os.path.sep) + current_level = directory_tree + for i, part in enumerate(parts): + if i == len(parts) - 1: + current_level[part] = status + else: + current_level = current_level.setdefault(part, {}) + + for status, manifest_path in file_status_list: + add_to_tree(manifest_path.path, status) + return directory_tree + + directory_tree = build_directory_tree(file_status_list) + print_tree(directory_tree) + logger.info("") diff --git a/src/deadline/job_attachments/upload.py b/src/deadline/job_attachments/upload.py index a91c38ad..16b22e52 100644 --- a/src/deadline/job_attachments/upload.py +++ b/src/deadline/job_attachments/upload.py @@ -91,6 +91,7 @@ class FileStatus(Enum): UNCHANGED = 0 NEW = 1 MODIFIED = 2 + DELETED = 3 class S3AssetUploader: diff --git a/test/unit/deadline_client/cli/test_cli_asset.py b/test/unit/deadline_client/cli/test_cli_asset.py index e7d53ca8..e3f2e6d9 100644 --- a/test/unit/deadline_client/cli/test_cli_asset.py +++ b/test/unit/deadline_client/cli/test_cli_asset.py @@ -1,7 +1,7 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. import pytest -from unittest.mock import patch, Mock +from unittest.mock import patch, Mock, MagicMock from click.testing import CliRunner import os @@ -9,8 +9,14 @@ from deadline.client.cli._groups import asset_group from deadline.client import api from deadline.client.api import _submit_job_bundle -from deadline.job_attachments.models import AssetRootGroup, JobAttachmentS3Settings, Attachments -from deadline.job_attachments.upload import S3AssetManager, S3AssetUploader +from deadline.job_attachments.models import ( + AssetRootGroup, + JobAttachmentS3Settings, + Attachments, +) +from deadline.job_attachments.upload import S3AssetManager, S3AssetUploader, FileStatus +from deadline.job_attachments.caches import HashCache +from deadline.job_attachments.asset_manifests.base_manifest import BaseManifestPath from deadline.job_attachments.asset_manifests.v2023_03_03 import AssetManifest from deadline.job_attachments.asset_manifests.hash_algorithms import HashAlgorithm @@ -20,6 +26,14 @@ ) +@pytest.fixture +def mock_cachedb(): + mock_hash_cache = MagicMock(spec=HashCache) + mock_hash_cache.__enter__.return_value = mock_hash_cache + mock_hash_cache.__exit__.return_value = None + return mock_hash_cache + + @pytest.fixture def mock_prepare_paths_for_upload(): with patch.object(S3AssetManager, "prepare_paths_for_upload") as mock: @@ -84,6 +98,56 @@ def mock_upload_attachments(): yield mock +@pytest.fixture +def mock_create_manifest_file(): + def _mock_create_manifest_file(input_paths, root_path, hash_cache): + return AssetManifest( + paths=[ + BaseManifestPath( + path=os.path.join(root_path, "file1.txt"), hash="mock_hash_1", size=0, mtime=0 + ), + BaseManifestPath( + path=os.path.join(root_path, "subdir1", "file2.txt"), + hash="mock_hash_2", + size=0, + mtime=0, + ), + BaseManifestPath( + path=os.path.join(root_path, "subdir2", "subdir3", "file3.txt"), + hash="mock_hash_3", + size=0, + mtime=0, + ), + ], + hash_alg=HashAlgorithm("xxh128"), + total_size=0, + ) + + with patch.object( + S3AssetManager, "_create_manifest_file", side_effect=_mock_create_manifest_file + ): + yield + + +@pytest.fixture +def mock_read_local_manifest(): + def _mock_read_local_manifest(manifest): + return AssetManifest( + paths=[ + BaseManifestPath(path="file1.txt", hash="old_hash_1", size=0, mtime=0), + BaseManifestPath(path="subdir1/file2.txt", hash="old_hash_2", size=0, mtime=0), + BaseManifestPath( + path="subdir2/subdir3/file3.txt", hash="old_hash_3", size=0, mtime=0 + ), + ], + hash_alg=HashAlgorithm("xxh128"), + total_size=0, + ) + + with patch.object(asset_group, "read_local_manifest", side_effect=_mock_read_local_manifest): + yield + + MOCK_ROOT_DIR = "/path/to/root" MOCK_MANIFEST_DIR = "/path/to/manifest" MOCK_MANIFEST_FILE = os.path.join(MOCK_MANIFEST_DIR, "manifest_input") @@ -412,8 +476,110 @@ def test_cli_asset_upload_read_local_manifest_returns_none( ], ) - assert ( - f"Specified manifest directory {MOCK_MANIFEST_DIR} does contain valid manifest input file." - in result.output + assert ( + f"Specified manifest directory {MOCK_MANIFEST_DIR} does contain valid manifest input file." + in result.output + ) + assert result.exit_code == 1 + + +class TestDiff: + + def run_test_case( + self, + mock_create_manifest_file, + mock_read_local_manifest, + mock_cachedb, + file_paths, + raw=False, + ): + + with patch.object(os.path, "isdir", side_effect=[True, True]), patch.object( + asset_group, "HashCache", return_value=mock_cachedb + ), patch.object( + asset_group, + "compare_manifest", + return_value=[ + ( + FileStatus.MODIFIED, + BaseManifestPath(path=path, hash=f"mock_hash_{i+1}", size=0, mtime=0), + ) + for i, path in enumerate(file_paths) + ], + ): + runner = CliRunner() + args = [ + "asset", + "diff", + "--root-dir", + MOCK_ROOT_DIR, + "--manifest-dir", + MOCK_MANIFEST_DIR, + ] + if raw: + args.append("--raw") + + result = runner.invoke(main, args) + return result + + def test_asset_diff_with_format( + self, mock_create_manifest_file, mock_read_local_manifest, mock_cachedb + ): + file_paths = ["file1.txt", "subdir1/file2.txt"] + result = self.run_test_case( + mock_create_manifest_file, mock_read_local_manifest, mock_cachedb, file_paths + ) + assert f"{MOCK_ROOT_DIR}" in result.output + assert "file1.txt M" in result.output + assert "file2.txt M" in result.output + assert result.exit_code == 0 + + def test_asset_diff_with_multiple_subdirectories( + self, mock_create_manifest_file, mock_read_local_manifest, mock_cachedb + ): + file_paths = ["file1.txt", "subdir1/file2.txt", "subdir2/subdir3/file3.txt"] + result = self.run_test_case( + mock_create_manifest_file, mock_read_local_manifest, mock_cachedb, file_paths, raw=True + ) + expected_output = "File Diffs: [(, BaseManifestPath(path='file1.txt', hash='mock_hash_1', size=0, mtime=0)), (, BaseManifestPath(path='subdir1/file2.txt', hash='mock_hash_2', size=0, mtime=0)), (, BaseManifestPath(path='subdir2/subdir3/file3.txt', hash='mock_hash_3', size=0, mtime=0))]" + assert expected_output in result.output + assert result.exit_code == 0 + + def test_asset_diff_without_format( + self, mock_init_objects, mock_create_manifest_file, mock_read_local_manifest, mock_cachedb + ): + file_paths = ["file1.txt", "subdir1/file2.txt"] + result = self.run_test_case( + mock_create_manifest_file, mock_read_local_manifest, mock_cachedb, file_paths, raw=True ) + expected_result = "File Diffs: [(, BaseManifestPath(path='file1.txt', hash='mock_hash_1', size=0, mtime=0)), (, BaseManifestPath(path='subdir1/file2.txt', hash='mock_hash_2', size=0, mtime=0))]\n" + assert expected_result in result.output + assert result.exit_code == 0 + + def test_asset_diff_invalid_root_dir(self, tmp_path): + invalid_root_dir = str(tmp_path / "invalid_dir") + manifest_dir = tmp_path / "manifests" + manifest_dir.mkdir() + + runner = CliRunner() + result = runner.invoke( + main, + ["asset", "diff", "--root-dir", invalid_root_dir, "--manifest-dir", str(manifest_dir)], + ) + assert result.exit_code == 1 + assert f"Specified root directory {invalid_root_dir} does not exist. " in result.output + + def test_asset_diff_invalid_manifest_dir(self, tmp_path): + root_dir = str(tmp_path) + invalid_manifest_dir = str(tmp_path / "invalid_manifests") + + runner = CliRunner() + result = runner.invoke( + main, ["asset", "diff", "--root-dir", root_dir, "--manifest-dir", invalid_manifest_dir] + ) + + assert result.exit_code == 1 + assert ( + f"Specified manifest directory {invalid_manifest_dir} does not exist. " in result.output + )