From 398da18169962967ecf2a257d352ef49a940d5fc Mon Sep 17 00:00:00 2001 From: Caden <132690522+marofke@users.noreply.github.com> Date: Thu, 29 Feb 2024 15:45:30 -0600 Subject: [PATCH] feat!: Add hashAlg file extension to files uploaded to CAS (#167) Signed-off-by: Caden Marofke --- src/deadline/job_attachments/asset_sync.py | 6 ++-- src/deadline/job_attachments/upload.py | 3 +- .../test_job_attachments.py | 12 ++++---- .../test_asset_sync.py | 14 ++++----- .../deadline_job_attachments/test_upload.py | 30 +++++++++---------- 5 files changed, 31 insertions(+), 34 deletions(-) diff --git a/src/deadline/job_attachments/asset_sync.py b/src/deadline/job_attachments/asset_sync.py index fa8dc6ef..3dbb8832 100644 --- a/src/deadline/job_attachments/asset_sync.py +++ b/src/deadline/job_attachments/asset_sync.py @@ -145,10 +145,9 @@ def _upload_output_manifest_to_s3( manifest_name_prefix = hash_data( f"{file_system_location_name or ''}{root_path}".encode(), hash_alg ) - # TODO: Remove hash algorithm file extension after sufficient time after the next release manifest_path = _join_s3_paths( full_output_prefix, - f"{manifest_name_prefix}_output.{output_manifest.hashAlg.value}", + f"{manifest_name_prefix}_output", ) metadata = {"Metadata": {"asset-root": root_path}} if file_system_location_name: @@ -225,8 +224,7 @@ def _get_output_files( ): file_size = file_path.lstat().st_size file_hash = hash_file(str(file_path), self.hash_alg) - # TODO: replace with uncommented line below after sufficient time after the next release - s3_key = file_hash # f"{file_hash}.{self.hash_alg.value}" + s3_key = f"{file_hash}.{self.hash_alg.value}" if s3_settings.full_cas_prefix(): s3_key = _join_s3_paths(s3_settings.full_cas_prefix(), s3_key) diff --git a/src/deadline/job_attachments/upload.py b/src/deadline/job_attachments/upload.py index ce54693c..eae9429a 100644 --- a/src/deadline/job_attachments/upload.py +++ b/src/deadline/job_attachments/upload.py @@ -297,8 +297,7 @@ def upload_object_to_cas( Returns a tuple (whether it has been uploaded, the file size). """ local_path = source_root.joinpath(file.path) - # TODO: replace with uncommented line below after sufficient time after the next release - s3_upload_key = f"{file.hash}" # .{hash_algorithm.value}" + s3_upload_key = f"{file.hash}.{hash_algorithm.value}" if s3_cas_prefix: s3_upload_key = _join_s3_paths(s3_cas_prefix, s3_upload_key) is_uploaded = False diff --git a/test/integ/deadline_job_attachments/test_job_attachments.py b/test/integ/deadline_job_attachments/test_job_attachments.py index 43a7c1cd..d6de3b1f 100644 --- a/test/integ/deadline_job_attachments/test_job_attachments.py +++ b/test/integ/deadline_job_attachments/test_job_attachments.py @@ -165,7 +165,7 @@ def upload_input_files_assets_not_in_cas(job_attachment_test: JobAttachmentTest) # THEN scene_ma_s3_path = ( - f"{job_attachment_settings.full_cas_prefix()}/{job_attachment_test.SCENE_MA_HASH}" + f"{job_attachment_settings.full_cas_prefix()}/{job_attachment_test.SCENE_MA_HASH}.xxh128" ) object_summary_iterator = job_attachment_test.bucket.objects.filter( @@ -211,7 +211,7 @@ def upload_input_files_one_asset_in_cas( ] scene_ma_s3_path = ( - f"{job_attachment_settings.full_cas_prefix()}/{job_attachment_test.SCENE_MA_HASH}" + f"{job_attachment_settings.full_cas_prefix()}/{job_attachment_test.SCENE_MA_HASH}.xxh128" ) # This file has already been uploaded @@ -245,8 +245,8 @@ def upload_input_files_one_asset_in_cas( brick_png_hash = hash_file(str(job_attachment_test.BRICK_PNG_PATH), HashAlgorithm.XXH128) cloth_png_hash = hash_file(str(job_attachment_test.CLOTH_PNG_PATH), HashAlgorithm.XXH128) - brick_png_s3_path = f"{job_attachment_settings.full_cas_prefix()}/{brick_png_hash}" - cloth_png_s3_path = f"{job_attachment_settings.full_cas_prefix()}/{cloth_png_hash}" + brick_png_s3_path = f"{job_attachment_settings.full_cas_prefix()}/{brick_png_hash}.xxh128" + cloth_png_s3_path = f"{job_attachment_settings.full_cas_prefix()}/{cloth_png_hash}.xxh128" object_summary_iterator = job_attachment_test.bucket.objects.filter( Prefix=f"{job_attachment_settings.full_cas_prefix()}/", @@ -831,11 +831,11 @@ def sync_outputs( object_key_set = set(obj.key for obj in object_summary_iterator) assert ( - f"{job_attachment_settings.full_cas_prefix()}/{hash_file(str(file_to_be_synced_step0_task0), HashAlgorithm.XXH128)}" + f"{job_attachment_settings.full_cas_prefix()}/{hash_file(str(file_to_be_synced_step0_task0), HashAlgorithm.XXH128)}.xxh128" in object_key_set ) assert ( - f"{job_attachment_settings.full_cas_prefix()}/{hash_file(str(file_not_to_be_synced), HashAlgorithm.XXH128)}" + f"{job_attachment_settings.full_cas_prefix()}/{hash_file(str(file_not_to_be_synced), HashAlgorithm.XXH128)}.xxh128" not in object_key_set ) diff --git a/test/unit/deadline_job_attachments/test_asset_sync.py b/test/unit/deadline_job_attachments/test_asset_sync.py index b6aea389..f616b883 100644 --- a/test/unit/deadline_job_attachments/test_asset_sync.py +++ b/test/unit/deadline_job_attachments/test_asset_sync.py @@ -528,11 +528,11 @@ def test_sync_outputs( s3 = boto3.Session(region_name="us-west-2").resource("s3") # pylint: disable=invalid-name bucket = s3.Bucket(s3_settings.s3BucketName) bucket.put_object( - Key=f"{expected_cas_prefix}hash1", + Key=f"{expected_cas_prefix}hash1.xxh128", Body="a", ) expected_metadata = s3.meta.client.head_object( - Bucket=s3_settings.s3BucketName, Key=f"{expected_cas_prefix}hash1" + Bucket=s3_settings.s3BucketName, Key=f"{expected_cas_prefix}hash1.xxh128" ) # WHEN @@ -585,21 +585,21 @@ def test_sync_outputs( # THEN actual_metadata = s3.meta.client.head_object( - Bucket=s3_settings.s3BucketName, Key=f"{expected_cas_prefix}hash1" + Bucket=s3_settings.s3BucketName, Key=f"{expected_cas_prefix}hash1.xxh128" ) assert actual_metadata["LastModified"] == expected_metadata["LastModified"] assert_expected_files_on_s3( bucket, expected_files={ - f"{expected_cas_prefix}hash1", - f"{expected_cas_prefix}hash2", - f"{expected_output_prefix}hash3_output.xxh128", + f"{expected_cas_prefix}hash1.xxh128", + f"{expected_cas_prefix}hash2.xxh128", + f"{expected_output_prefix}hash3_output", }, ) assert_canonical_manifest( bucket, - f"{expected_output_prefix}hash3_output.xxh128", + f"{expected_output_prefix}hash3_output", expected_manifest='{"hashAlg":"xxh128","manifestVersion":"2023-03-03",' f'"paths":[{{"hash":"hash2","mtime":{expected_sub_file_mtime},"path":"{expected_sub_file_rel_path}",' f'"size":{expected_processed_bytes}}},' diff --git a/test/unit/deadline_job_attachments/test_upload.py b/test/unit/deadline_job_attachments/test_upload.py index 338f56af..c300cc76 100644 --- a/test/unit/deadline_job_attachments/test_upload.py +++ b/test/unit/deadline_job_attachments/test_upload.py @@ -267,10 +267,10 @@ def test_asset_management( bucket, expected_files={ f"assetRoot/Manifests/{farm_id}/{queue_id}/Inputs/0000/e_input", - f"{self.job_attachment_s3_settings.full_cas_prefix()}/a", - f"{self.job_attachment_s3_settings.full_cas_prefix()}/b", - f"{self.job_attachment_s3_settings.full_cas_prefix()}/c", - f"{self.job_attachment_s3_settings.full_cas_prefix()}/d", + f"{self.job_attachment_s3_settings.full_cas_prefix()}/a.xxh128", + f"{self.job_attachment_s3_settings.full_cas_prefix()}/b.xxh128", + f"{self.job_attachment_s3_settings.full_cas_prefix()}/c.xxh128", + f"{self.job_attachment_s3_settings.full_cas_prefix()}/d.xxh128", }, ) @@ -438,7 +438,7 @@ def test_asset_management_windows_multi_root( bucket, expected_files={ f"{self.job_attachment_s3_settings.rootPrefix}/Manifests/{farm_id}/{queue_id}/Inputs/0000/b_input", - f"{self.job_attachment_s3_settings.full_cas_prefix()}/a", + f"{self.job_attachment_s3_settings.full_cas_prefix()}/a.xxh128", }, ) @@ -600,7 +600,7 @@ def test_asset_management_many_inputs( expected_files = set( [ - f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}" + f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}.xxh128" for i in range(num_input_files) ] ) @@ -792,7 +792,7 @@ def mock_hash_file(file_path: str, hash_alg: HashAlgorithm): # mock pre-uploading the file bucket.put_object( - Key=f"{self.job_attachment_s3_settings.full_cas_prefix()}/existinghash", + Key=f"{self.job_attachment_s3_settings.full_cas_prefix()}/existinghash.xxh128", Body="a", ) @@ -856,8 +856,8 @@ def mock_hash_file(file_path: str, hash_alg: HashAlgorithm): bucket, expected_files={ f"{self.job_attachment_s3_settings.rootPrefix}/Manifests/{farm_id}/{queue_id}/Inputs/0000/manifest_input", - f"{self.job_attachment_s3_settings.full_cas_prefix()}/existinghash", - f"{self.job_attachment_s3_settings.full_cas_prefix()}/somethingnew", + f"{self.job_attachment_s3_settings.full_cas_prefix()}/existinghash.xxh128", + f"{self.job_attachment_s3_settings.full_cas_prefix()}/somethingnew.xxh128", }, ) @@ -933,7 +933,7 @@ def test_asset_management_no_outputs_large_number_of_inputs_already_uploaded( input_files.append(test_file) # mock pre-uploading the file bucket.put_object( - Key=f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}", + Key=f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}.xxh128", Body=f"test {i}", ) @@ -997,7 +997,7 @@ def test_asset_management_no_outputs_large_number_of_inputs_already_uploaded( expected_files = set( [ - f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}" + f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}.xxh128" for i in range(num_input_files) ] ) @@ -1770,7 +1770,7 @@ def test_manage_assets_with_symlinks( bucket, expected_files={ f"assetRoot/Manifests/{farm_id}/{queue_id}/Inputs/0000/manifest_input", - f"{self.job_attachment_s3_settings.full_cas_prefix()}/a", + f"{self.job_attachment_s3_settings.full_cas_prefix()}/a.xxh128", }, ) @@ -2274,7 +2274,7 @@ def test_upload_object_to_cas_skips_upload_with_cache( job_attachment_settings=self.job_attachment_s3_settings, asset_manifest_version=manifest_version, ) - s3_key = f"{default_job_attachment_s3_settings.s3BucketName}/prefix/test-hash" + s3_key = f"{default_job_attachment_s3_settings.s3BucketName}/prefix/test-hash.xxh128" test_entry = S3CheckCacheEntry(s3_key, "123.45") s3_cache = MagicMock() s3_cache.get_entry.return_value = test_entry @@ -2327,7 +2327,7 @@ def test_upload_object_to_cas_adds_cache_entry( job_attachment_settings=self.job_attachment_s3_settings, asset_manifest_version=manifest_version, ) - s3_key = f"{default_job_attachment_s3_settings.s3BucketName}/prefix/test-hash" + s3_key = f"{default_job_attachment_s3_settings.s3BucketName}/prefix/test-hash.xxh128" s3_cache = MagicMock() s3_cache.get_entry.return_value = None expected_new_entry = S3CheckCacheEntry(s3_key, "345.67") @@ -2359,7 +2359,7 @@ def test_upload_object_to_cas_adds_cache_entry( assert_expected_files_on_s3( bucket, - expected_files={"prefix/test-hash"}, + expected_files={"prefix/test-hash.xxh128"}, )