From 91dfa83594cde8121ec7dd1621dc4cace86bfb2e Mon Sep 17 00:00:00 2001 From: Nathan MacLeod <142927985+npmacl@users.noreply.github.com> Date: Fri, 22 Mar 2024 16:02:42 -0700 Subject: [PATCH] feat: enable cache for VFS (#209) * feat: enable cache for VFS Signed-off-by: Nathan MacLeod <142927985+npmacl@users.noreply.github.com> * switched to precompiling alphanumerical regex Signed-off-by: Nathan MacLeod <142927985+npmacl@users.noreply.github.com> * ran fmt Signed-off-by: Nathan MacLeod <142927985+npmacl@users.noreply.github.com> --------- Signed-off-by: Nathan MacLeod <142927985+npmacl@users.noreply.github.com> --- .../job_attachments/asset_manifests/decode.py | 14 +++++- src/deadline/job_attachments/download.py | 11 ++++- src/deadline/job_attachments/vfs.py | 48 +++++++------------ .../asset_manifests/test_decode.py | 36 ++++++++++++-- .../v2023_03_03/test_asset_manifest.py | 30 ++++++------ .../data/manifest_v2023_03_03.json | 2 +- .../unit/deadline_job_attachments/test_vfs.py | 42 +++++++--------- 7 files changed, 106 insertions(+), 77 deletions(-) diff --git a/src/deadline/job_attachments/asset_manifests/decode.py b/src/deadline/job_attachments/asset_manifests/decode.py index 9ce239ea..9a229d11 100644 --- a/src/deadline/job_attachments/asset_manifests/decode.py +++ b/src/deadline/job_attachments/asset_manifests/decode.py @@ -4,6 +4,7 @@ from __future__ import annotations import json +import re from pathlib import Path from typing import Any, Optional, Tuple @@ -14,6 +15,8 @@ from .manifest_model import ManifestModelRegistry from .versions import ManifestVersion +alphanum_regex = re.compile("[a-zA-Z0-9]+") + def _get_schema(version) -> dict[str, Any]: schema_filename = Path(__file__).parent.joinpath("schemas", version + ".json").resolve() @@ -31,6 +34,7 @@ def validate_manifest( """ try: jsonschema.validate(manifest, _get_schema(version)) + except (jsonschema.ValidationError, jsonschema.SchemaError) as e: return False, str(e) @@ -67,5 +71,13 @@ def decode_manifest(manifest: str) -> BaseAssetManifest: raise ManifestDecodeValidationError(error_string) manifest_model = ManifestModelRegistry.get_manifest_model(version=version) + decoded_manifest = manifest_model.AssetManifest.decode(manifest_data=document) + + # Validate hashes are alphanumeric + for path in decoded_manifest.paths: + if alphanum_regex.fullmatch(path.hash) is None: + raise ManifestDecodeValidationError( + f"The hash {path.hash} for path {path.path} is not alphanumeric" + ) - return manifest_model.AssetManifest.decode(manifest_data=document) + return decoded_manifest diff --git a/src/deadline/job_attachments/download.py b/src/deadline/job_attachments/download.py index bfbbbd14..dc4f2aef 100644 --- a/src/deadline/job_attachments/download.py +++ b/src/deadline/job_attachments/download.py @@ -65,6 +65,7 @@ download_logger = getLogger("deadline.job_attachments.download") S3_DOWNLOAD_MAX_CONCURRENCY = 10 +VFS_CACHE_REL_PATH_IN_SESSION = ".vfs_object_cache" def get_manifest_from_s3( @@ -952,12 +953,19 @@ def mount_vfs_from_manifests( None """ + vfs_cache_dir: Path = session_dir / VFS_CACHE_REL_PATH_IN_SESSION + asset_cache_hash_path: Path = vfs_cache_dir + if cas_prefix is not None: + asset_cache_hash_path = vfs_cache_dir / cas_prefix + _ensure_paths_within_directory(str(vfs_cache_dir), [str(asset_cache_hash_path)]) + + asset_cache_hash_path.mkdir(parents=True, exist_ok=True) + for mount_point, manifest in manifests_by_root.items(): # Validate the file paths to see if they are under the given download directory. _ensure_paths_within_directory( mount_point, [path.path for path in manifest.paths] # type: ignore ) - final_manifest: BaseAssetManifest = handle_existing_vfs( manifest=manifest, session_dir=session_dir, mount_point=mount_point, os_user=os_user ) @@ -974,6 +982,7 @@ def mount_vfs_from_manifests( os_env_vars, os_group, cas_prefix, + str(vfs_cache_dir), ) vfs_manager.start(session_dir=session_dir) diff --git a/src/deadline/job_attachments/vfs.py b/src/deadline/job_attachments/vfs.py index 96eac036..411c9c7d 100644 --- a/src/deadline/job_attachments/vfs.py +++ b/src/deadline/job_attachments/vfs.py @@ -8,7 +8,7 @@ import time from pathlib import Path import threading -from typing import Dict, List, Union, Optional +from typing import Dict, Union, Optional from .exceptions import ( VFSExecutableMissingError, @@ -23,7 +23,6 @@ DEADLINE_VFS_INSTALL_PATH = "/opt/deadline_vfs" DEADLINE_VFS_EXECUTABLE_SCRIPT = "/scripts/production/al2/run_deadline_vfs_al2.sh" - DEADLINE_VFS_PID_FILE_NAME = "vfs_pids.txt" DEADLINE_MANIFEST_GROUP_READ_PERMS = 0o640 @@ -46,6 +45,7 @@ class VFSProcessManager(object): _os_env_vars: Dict[str, str] _os_group: Optional[str] _cas_prefix: Optional[str] + _asset_cache_path: Optional[str] def __init__( self, @@ -57,6 +57,7 @@ def __init__( os_env_vars: Dict[str, str], os_group: Optional[str] = None, cas_prefix: Optional[str] = None, + asset_cache_path: Optional[str] = None, ): # TODO: Once Windows pathmapping is implemented we can remove this if sys.platform == "win32": @@ -74,6 +75,7 @@ def __init__( self._os_group = os_group self._os_env_vars = os_env_vars self._cas_prefix = cas_prefix + self._asset_cache_path = asset_cache_path @classmethod def kill_all_processes(cls, session_dir: Path, os_user: str) -> None: @@ -255,40 +257,26 @@ def find_vfs_link_dir(cls) -> str: """ return os.path.join(os.path.dirname(VFSProcessManager.find_vfs()), "..", "link") - def build_launch_command(self, mount_point: Union[os.PathLike, str]) -> List: + def build_launch_command(self, mount_point: Union[os.PathLike, str]) -> str: """ Build command to pass to Popen to launch VFS :param mount_point: directory to mount which must be the first parameter seen by our executable :return: command """ - command = [] - executable = VFSProcessManager.find_vfs_launch_script() - if self._cas_prefix is None: - command = [ - "sudo -u %s %s %s -f --clienttype=deadline --bucket=%s --manifest=%s --region=%s -oallow_other" - % ( - self._os_user, - executable, - mount_point, - self._asset_bucket, - self._manifest_path, - self._region, - ) - ] - else: - command = [ - "sudo -u %s %s %s -f --clienttype=deadline --bucket=%s --manifest=%s --region=%s --casprefix=%s -oallow_other" - % ( - self._os_user, - executable, - mount_point, - self._asset_bucket, - self._manifest_path, - self._region, - self._cas_prefix, - ) - ] + + command = ( + f"sudo -u {self._os_user}" + f" {executable} {mount_point} -f --clienttype=deadline" + f" --bucket={self._asset_bucket}" + f" --manifest={self._manifest_path}" + f" --region={self._region}" + f" -oallow_other" + ) + if self._cas_prefix is not None: + command += f" --casprefix={self._cas_prefix}" + if self._asset_cache_path is not None: + command += f" --cachedir={self._asset_cache_path}" log.info(f"Got launch command {command}") return command diff --git a/test/unit/deadline_job_attachments/asset_manifests/test_decode.py b/test/unit/deadline_job_attachments/asset_manifests/test_decode.py index d06f59d7..fa9fd6cb 100644 --- a/test/unit/deadline_job_attachments/asset_manifests/test_decode.py +++ b/test/unit/deadline_job_attachments/asset_manifests/test_decode.py @@ -95,19 +95,19 @@ def test_decode_manifest_v2023_03_03(default_manifest_str_v2023_03_03: str): hash_alg=HashAlgorithm.XXH128, total_size=10, paths=[ - Path_v2023_03_03(path="\r", hash="Carriage Return", size=1, mtime=1679079744833848), + Path_v2023_03_03(path="\r", hash="CarriageReturn", size=1, mtime=1679079744833848), Path_v2023_03_03(path="1", hash="One", size=1, mtime=1679079344833868), Path_v2023_03_03(path="another_test_file", hash="c", size=1, mtime=1675079344833848), Path_v2023_03_03(path="test_dir/test_file", hash="b", size=1, mtime=1479079344833848), Path_v2023_03_03(path="test_file", hash="a", size=1, mtime=167907934333848), Path_v2023_03_03(path="\u0080", hash="Control", size=1, mtime=1679079344833348), Path_v2023_03_03( - path="ö", hash="Latin Small Letter O With Diaeresis", size=1, mtime=1679079344833848 + path="ö", hash="LatinSmallLetterOWithDiaeresis", size=1, mtime=1679079344833848 ), - Path_v2023_03_03(path="€", hash="Euro Sign", size=1, mtime=1679079344836848), - Path_v2023_03_03(path="😀", hash="Emoji: Grinning Face", size=1, mtime=1679579344833848), + Path_v2023_03_03(path="€", hash="EuroSign", size=1, mtime=1679079344836848), + Path_v2023_03_03(path="😀", hash="EmojiGrinningFace", size=1, mtime=1679579344833848), Path_v2023_03_03( - path="דּ", hash="Hebrew Letter Dalet With Dagesh", size=1, mtime=1679039344833848 + path="דּ", hash="HebrewLetterDaletWithDagesh", size=1, mtime=1679039344833848 ), ], ) @@ -173,3 +173,29 @@ def test_decode_manifest_missing_manifest_version(): match='Manifest is missing the required "manifestVersion" field', ): decode.decode_manifest('{"hashAlg": "xxh128"}') + + +def test_decode_manifest_hash_not_alphanumeric(): + """ + Test that a ManifestDecodeValidationError is raised if the manifest contains non-alphanumeric hashes + """ + invalid_hashes: list[tuple[str, str]] = [ + ("no_dots", "O.o"), + ("no_foward_slash", "a/b"), + ("no_back_slash", "a\\\\b"), + ("no_tildas", "o~o"), + ] + + for path, hash in invalid_hashes: + with pytest.raises(ManifestDecodeValidationError, match=r".*is not alphanumeric"): + manifest_str = ( + "{" + '"hashAlg":"xxh128",' + '"manifestVersion":"2023-03-03",' + '"paths":[' + f'{{"hash":"{hash}","mtime":1679079744833848,"path":"{path}","size":1}}' + "]," + '"totalSize":10' + "}" + ) + decode.decode_manifest(manifest_str) diff --git a/test/unit/deadline_job_attachments/asset_manifests/v2023_03_03/test_asset_manifest.py b/test/unit/deadline_job_attachments/asset_manifests/v2023_03_03/test_asset_manifest.py index 706b3efa..96310473 100644 --- a/test/unit/deadline_job_attachments/asset_manifests/v2023_03_03/test_asset_manifest.py +++ b/test/unit/deadline_job_attachments/asset_manifests/v2023_03_03/test_asset_manifest.py @@ -21,16 +21,16 @@ def test_encode(): ManifestPath(path="test_file", hash="a", size=1, mtime=167907934333848), ManifestPath(path="test_dir/test_file", hash="b", size=1, mtime=1479079344833848), ManifestPath(path="another_test_file", hash="c", size=1, mtime=1675079344833848), - ManifestPath(path="€", hash="Euro Sign", size=1, mtime=1679079344836848), - ManifestPath(path="\r", hash="Carriage Return", size=1, mtime=1679079744833848), + ManifestPath(path="€", hash="EuroSign", size=1, mtime=1679079344836848), + ManifestPath(path="\r", hash="CarriageReturn", size=1, mtime=1679079744833848), ManifestPath( - path="דּ", hash="Hebrew Letter Dalet With Dagesh", size=1, mtime=1679039344833848 + path="דּ", hash="HebrewLetterDaletWithDagesh", size=1, mtime=1679039344833848 ), ManifestPath(path="1", hash="One", size=1, mtime=1679079344833868), - ManifestPath(path="😀", hash="Emoji: Grinning Face", size=1, mtime=1679579344833848), + ManifestPath(path="😀", hash="EmojiGrinningFace", size=1, mtime=1679579344833848), ManifestPath(path="\u0080", hash="Control", size=1, mtime=1679079344833348), ManifestPath( - path="ö", hash="Latin Small Letter O With Diaeresis", size=1, mtime=1679079344833848 + path="ö", hash="LatinSmallLetterOWithDiaeresis", size=1, mtime=1679079344833848 ), ], ) @@ -40,16 +40,16 @@ def test_encode(): '"hashAlg":"xxh128",' '"manifestVersion":"2023-03-03",' '"paths":[' - r'{"hash":"Carriage Return","mtime":1679079744833848,"path":"\r","size":1},' + r'{"hash":"CarriageReturn","mtime":1679079744833848,"path":"\r","size":1},' '{"hash":"One","mtime":1679079344833868,"path":"1","size":1},' '{"hash":"c","mtime":1675079344833848,"path":"another_test_file","size":1},' '{"hash":"b","mtime":1479079344833848,"path":"test_dir/test_file","size":1},' '{"hash":"a","mtime":167907934333848,"path":"test_file","size":1},' r'{"hash":"Control","mtime":1679079344833348,"path":"\u0080","size":1},' - r'{"hash":"Latin Small Letter O With Diaeresis","mtime":1679079344833848,"path":"\u00f6","size":1},' - r'{"hash":"Euro Sign","mtime":1679079344836848,"path":"\u20ac","size":1},' - r'{"hash":"Emoji: Grinning Face","mtime":1679579344833848,"path":"\ud83d\ude00","size":1},' - r'{"hash":"Hebrew Letter Dalet With Dagesh","mtime":1679039344833848,"path":"\ufb33","size":1}' + r'{"hash":"LatinSmallLetterOWithDiaeresis","mtime":1679079344833848,"path":"\u00f6","size":1},' + r'{"hash":"EuroSign","mtime":1679079344836848,"path":"\u20ac","size":1},' + r'{"hash":"EmojiGrinningFace","mtime":1679579344833848,"path":"\ud83d\ude00","size":1},' + r'{"hash":"HebrewLetterDaletWithDagesh","mtime":1679039344833848,"path":"\ufb33","size":1}' "]," '"totalSize":10' "}" @@ -67,19 +67,19 @@ def test_decode(default_manifest_str_v2023_03_03: str): hash_alg=HashAlgorithm("xxh128"), total_size=10, paths=[ - ManifestPath(path="\r", hash="Carriage Return", size=1, mtime=1679079744833848), + ManifestPath(path="\r", hash="CarriageReturn", size=1, mtime=1679079744833848), ManifestPath(path="1", hash="One", size=1, mtime=1679079344833868), ManifestPath(path="another_test_file", hash="c", size=1, mtime=1675079344833848), ManifestPath(path="test_dir/test_file", hash="b", size=1, mtime=1479079344833848), ManifestPath(path="test_file", hash="a", size=1, mtime=167907934333848), ManifestPath(path="\u0080", hash="Control", size=1, mtime=1679079344833348), ManifestPath( - path="ö", hash="Latin Small Letter O With Diaeresis", size=1, mtime=1679079344833848 + path="ö", hash="LatinSmallLetterOWithDiaeresis", size=1, mtime=1679079344833848 ), - ManifestPath(path="€", hash="Euro Sign", size=1, mtime=1679079344836848), - ManifestPath(path="😀", hash="Emoji: Grinning Face", size=1, mtime=1679579344833848), + ManifestPath(path="€", hash="EuroSign", size=1, mtime=1679079344836848), + ManifestPath(path="😀", hash="EmojiGrinningFace", size=1, mtime=1679579344833848), ManifestPath( - path="דּ", hash="Hebrew Letter Dalet With Dagesh", size=1, mtime=1679039344833848 + path="דּ", hash="HebrewLetterDaletWithDagesh", size=1, mtime=1679039344833848 ), ], ) diff --git a/test/unit/deadline_job_attachments/data/manifest_v2023_03_03.json b/test/unit/deadline_job_attachments/data/manifest_v2023_03_03.json index f51fe9e5..1756b633 100644 --- a/test/unit/deadline_job_attachments/data/manifest_v2023_03_03.json +++ b/test/unit/deadline_job_attachments/data/manifest_v2023_03_03.json @@ -1 +1 @@ -{"hashAlg":"xxh128","manifestVersion":"2023-03-03","paths":[{"hash":"Carriage Return","mtime":1679079744833848,"path":"\r","size":1},{"hash":"One","mtime":1679079344833868,"path":"1","size":1},{"hash":"c","mtime":1675079344833848,"path":"another_test_file","size":1},{"hash":"b","mtime":1479079344833848,"path":"test_dir/test_file","size":1},{"hash":"a","mtime":167907934333848,"path":"test_file","size":1},{"hash":"Control","mtime":1679079344833348,"path":"\u0080","size":1},{"hash":"Latin Small Letter O With Diaeresis","mtime":1679079344833848,"path":"\u00f6","size":1},{"hash":"Euro Sign","mtime":1679079344836848,"path":"\u20ac","size":1},{"hash":"Emoji: Grinning Face","mtime":1679579344833848,"path":"\ud83d\ude00","size":1},{"hash":"Hebrew Letter Dalet With Dagesh","mtime":1679039344833848,"path":"\ufb33","size":1}],"totalSize":10} \ No newline at end of file +{"hashAlg":"xxh128","manifestVersion":"2023-03-03","paths":[{"hash":"CarriageReturn","mtime":1679079744833848,"path":"\r","size":1},{"hash":"One","mtime":1679079344833868,"path":"1","size":1},{"hash":"c","mtime":1675079344833848,"path":"another_test_file","size":1},{"hash":"b","mtime":1479079344833848,"path":"test_dir/test_file","size":1},{"hash":"a","mtime":167907934333848,"path":"test_file","size":1},{"hash":"Control","mtime":1679079344833348,"path":"\u0080","size":1},{"hash":"LatinSmallLetterOWithDiaeresis","mtime":1679079344833848,"path":"\u00f6","size":1},{"hash":"EuroSign","mtime":1679079344836848,"path":"\u20ac","size":1},{"hash":"EmojiGrinningFace","mtime":1679579344833848,"path":"\ud83d\ude00","size":1},{"hash":"HebrewLetterDaletWithDagesh","mtime":1679039344833848,"path":"\ufb33","size":1}],"totalSize":10} \ No newline at end of file diff --git a/test/unit/deadline_job_attachments/test_vfs.py b/test/unit/deadline_job_attachments/test_vfs.py index 9d9eb9e1..609cc14b 100644 --- a/test/unit/deadline_job_attachments/test_vfs.py +++ b/test/unit/deadline_job_attachments/test_vfs.py @@ -8,7 +8,7 @@ import sys from pathlib import Path import threading -from typing import List, Union +from typing import Union from unittest.mock import Mock, patch, call, MagicMock import pytest @@ -99,17 +99,14 @@ def test_build_launch_command( test_executable = os.environ[DEADLINE_VFS_ENV_VAR] + DEADLINE_VFS_EXECUTABLE_SCRIPT - expected_launch_command: List = [ - "sudo -u %s %s %s -f --clienttype=deadline --bucket=%s --manifest=%s --region=%s -oallow_other" - % ( - test_os_user, - test_executable, - local_root, - self.s3_settings.s3BucketName, - manifest_path, - os.environ["AWS_DEFAULT_REGION"], - ) - ] + expected_launch_command = ( + f"sudo -u {test_os_user}" + f" {test_executable} {local_root} -f --clienttype=deadline" + f" --bucket={self.s3_settings.s3BucketName}" + f" --manifest={manifest_path}" + f" --region={os.environ['AWS_DEFAULT_REGION']}" + f" -oallow_other" + ) with patch( f"{deadline.__package__}.job_attachments.vfs.os.path.exists", return_value=True, @@ -134,18 +131,15 @@ def test_build_launch_command( # intermediate cleanup VFSProcessManager.launch_script_path = None - expected_launch_command = [ - "sudo -u %s %s %s -f --clienttype=deadline --bucket=%s --manifest=%s --region=%s --casprefix=%s -oallow_other" - % ( - test_os_user, - test_executable, - local_root, - self.s3_settings.s3BucketName, - manifest_path, - os.environ["AWS_DEFAULT_REGION"], - test_CAS_prefix, - ) - ] + expected_launch_command = ( + f"sudo -u {test_os_user}" + f" {test_executable} {local_root} -f --clienttype=deadline" + f" --bucket={self.s3_settings.s3BucketName}" + f" --manifest={manifest_path}" + f" --region={os.environ['AWS_DEFAULT_REGION']}" + f" -oallow_other" + f" --casprefix={test_CAS_prefix}" + ) with patch( f"{deadline.__package__}.job_attachments.vfs.os.path.exists", return_value=True,