From 2302cf840ad1fe82d3a35ac54e82610c33628a21 Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Wed, 31 Jan 2024 14:14:34 +1300 Subject: [PATCH 1/4] fix: NoSuchFileError is stopping multiprocessing sidecar files TDE-1007 --- scripts/files/fs.py | 17 ++++++++++------ scripts/files/tests/fs_test.py | 36 +++++++++++++++++++++++++++++----- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/scripts/files/fs.py b/scripts/files/fs.py index c7ecf992e..db65147e2 100644 --- a/scripts/files/fs.py +++ b/scripts/files/fs.py @@ -98,12 +98,12 @@ def write_sidecars(inputs: List[str], target: str, concurrency: Optional[int] = """ with ThreadPoolExecutor(max_workers=concurrency) as executor: - try: - results = {write_file(executor, input_, target): input_ for input_ in inputs} - for future in as_completed(results): + results = {write_file(executor, input_, target): input_ for input_ in inputs} + for future in as_completed(results): + try: get_log().info("wrote_sidecar_file", path=future.result()) - except NoSuchFileError: - get_log().info("No sidecar file found; skipping") + except NoSuchFileError: + get_log().info("No sidecar file found; skipping") def write_file(executor: ThreadPoolExecutor, input_: str, target: str) -> Future[str]: @@ -117,7 +117,12 @@ def write_file(executor: ThreadPoolExecutor, input_: str, target: str) -> Future Returns: Future[str]: The result of the execution. """ - return executor.submit(write, os.path.join(target, f"{os.path.basename(input_)}"), read(input_)) + future: Future[str] = Future() + try: + future = executor.submit(write, os.path.join(target, f"{os.path.basename(input_)}"), read(input_)) + except NoSuchFileError as nsfe: + future.set_exception(nsfe) + return future class NoSuchFileError(Exception): diff --git a/scripts/files/tests/fs_test.py b/scripts/files/tests/fs_test.py index 0fd8b05e6..9d3d07522 100644 --- a/scripts/files/tests/fs_test.py +++ b/scripts/files/tests/fs_test.py @@ -1,11 +1,14 @@ import json +import os +from shutil import rmtree +from tempfile import mkdtemp from boto3 import resource from moto import mock_s3 from moto.s3.responses import DEFAULT_REGION_NAME from pytest import CaptureFixture, raises -from scripts.files.fs import NoSuchFileError, read, write_all, write_sidecars +from scripts.files.fs import NoSuchFileError, read, write, write_all, write_sidecars def test_read_key_not_found_local() -> None: @@ -25,9 +28,12 @@ def test_read_key_not_found_s3(capsys: CaptureFixture[str]) -> None: assert logs["msg"] == "s3_key_not_found" -def test_write_all_file_not_found_local() -> None: - with raises(NoSuchFileError): +def test_write_all_file_not_found_local(capsys: CaptureFixture[str]) -> None: + # Raises an exception as all files are not writte· + with raises(Exception): write_all(["/test.prj"], "/tmp") + logs = json.loads(capsys.readouterr().out.strip()) + assert logs["error"] == NoSuchFileError() def test_write_sidecars_file_not_found_local(capsys: CaptureFixture[str]) -> None: @@ -38,12 +44,15 @@ def test_write_sidecars_file_not_found_local(capsys: CaptureFixture[str]) -> Non @mock_s3 # type: ignore -def test_write_all_key_not_found_s3() -> None: +def test_write_all_key_not_found_s3(capsys: CaptureFixture[str]) -> None: s3 = resource("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - with raises(NoSuchFileError): + # Raises an exception as all files are not written + with raises(Exception): write_all(["s3://testbucket/test.tif"], "/tmp") + logs = json.loads(capsys.readouterr().out.strip()) + assert logs["error"] == NoSuchFileError() @mock_s3 # type: ignore @@ -56,3 +65,20 @@ def test_write_sidecars_key_not_found_s3(capsys: CaptureFixture[str]) -> None: # capsys.readouterr().out json string format is not valid which implies # we can't parse it to find the actual `msg` assert "No sidecar file found; skipping" in capsys.readouterr().out + + +def test_write_sidecars_one_found(capsys: CaptureFixture[str]) -> None: + target = mkdtemp() + # Add a file to read + content = b"test content" + path = os.path.join(target, "test.tfw") + write(path, content) + path_unexisting = os.path.join(target, "test.prj") + # Write the sidecar files with one unexisting + write_sidecars([path_unexisting, path], os.path.join(target, "/tmp")) + logs = capsys.readouterr().out + # One has not been found + assert "No sidecar file found; skipping" in logs + # One has been found + assert "wrote_sidecar_file" in logs + rmtree(target) From 6e8b1d12c15209665ddcba15c9e496110e1e0fe7 Mon Sep 17 00:00:00 2001 From: paulfouquet <86932794+paulfouquet@users.noreply.github.com> Date: Thu, 1 Feb 2024 09:40:06 +1300 Subject: [PATCH 2/4] refactor: variable naming Co-authored-by: Victor Engmark --- scripts/files/tests/fs_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/files/tests/fs_test.py b/scripts/files/tests/fs_test.py index 9d3d07522..a84e1d234 100644 --- a/scripts/files/tests/fs_test.py +++ b/scripts/files/tests/fs_test.py @@ -73,7 +73,7 @@ def test_write_sidecars_one_found(capsys: CaptureFixture[str]) -> None: content = b"test content" path = os.path.join(target, "test.tfw") write(path, content) - path_unexisting = os.path.join(target, "test.prj") + non_existing_path = os.path.join(target, "test.prj") # Write the sidecar files with one unexisting write_sidecars([path_unexisting, path], os.path.join(target, "/tmp")) logs = capsys.readouterr().out From 09f14b37d5d334639772ea11a925c832aaff5fb9 Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Thu, 1 Feb 2024 09:58:37 +1300 Subject: [PATCH 3/4] test: rename variable usage --- scripts/files/tests/fs_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/files/tests/fs_test.py b/scripts/files/tests/fs_test.py index a84e1d234..fc6ecc9d0 100644 --- a/scripts/files/tests/fs_test.py +++ b/scripts/files/tests/fs_test.py @@ -75,7 +75,7 @@ def test_write_sidecars_one_found(capsys: CaptureFixture[str]) -> None: write(path, content) non_existing_path = os.path.join(target, "test.prj") # Write the sidecar files with one unexisting - write_sidecars([path_unexisting, path], os.path.join(target, "/tmp")) + write_sidecars([non_existing_path, path], os.path.join(target, "/tmp")) logs = capsys.readouterr().out # One has not been found assert "No sidecar file found; skipping" in logs From 36bdfb705be1ba58824968c3b5a254dbcc72077d Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Thu, 1 Feb 2024 10:37:22 +1300 Subject: [PATCH 4/4] test: assertion was not working --- scripts/files/tests/fs_test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/files/tests/fs_test.py b/scripts/files/tests/fs_test.py index fc6ecc9d0..24f4ab56d 100644 --- a/scripts/files/tests/fs_test.py +++ b/scripts/files/tests/fs_test.py @@ -32,8 +32,7 @@ def test_write_all_file_not_found_local(capsys: CaptureFixture[str]) -> None: # Raises an exception as all files are not writte· with raises(Exception): write_all(["/test.prj"], "/tmp") - logs = json.loads(capsys.readouterr().out.strip()) - assert logs["error"] == NoSuchFileError() + assert '"error": "NoSuchFileError()"' in capsys.readouterr().out def test_write_sidecars_file_not_found_local(capsys: CaptureFixture[str]) -> None: @@ -51,8 +50,7 @@ def test_write_all_key_not_found_s3(capsys: CaptureFixture[str]) -> None: # Raises an exception as all files are not written with raises(Exception): write_all(["s3://testbucket/test.tif"], "/tmp") - logs = json.loads(capsys.readouterr().out.strip()) - assert logs["error"] == NoSuchFileError() + assert '"error": "NoSuchFileError()"' in capsys.readouterr().out @mock_s3 # type: ignore