Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: NoSuchFileError is stopping multiprocessing sidecar files TDE-1007 #832

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions scripts/files/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,12 @@ def write_sidecars(inputs: List[str], target: str, concurrency: Optional[int] =

"""
with ThreadPoolExecutor(max_workers=concurrency) as executor:
try:
results = {write_file(executor, input_, target): input_ for input_ in inputs}
for future in as_completed(results):
results = {write_file(executor, input_, target): input_ for input_ in inputs}
for future in as_completed(results):
try:
get_log().info("wrote_sidecar_file", path=future.result())
except NoSuchFileError:
get_log().info("No sidecar file found; skipping")
except NoSuchFileError:
get_log().info("No sidecar file found; skipping")


def write_file(executor: ThreadPoolExecutor, input_: str, target: str) -> Future[str]:
Expand All @@ -117,7 +117,12 @@ def write_file(executor: ThreadPoolExecutor, input_: str, target: str) -> Future
Returns:
Future[str]: The result of the execution.
"""
return executor.submit(write, os.path.join(target, f"{os.path.basename(input_)}"), read(input_))
future: Future[str] = Future()
try:
future = executor.submit(write, os.path.join(target, f"{os.path.basename(input_)}"), read(input_))
except NoSuchFileError as nsfe:
future.set_exception(nsfe)
return future


class NoSuchFileError(Exception):
Expand Down
34 changes: 29 additions & 5 deletions scripts/files/tests/fs_test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import json
import os
from shutil import rmtree
from tempfile import mkdtemp

from boto3 import resource
from moto import mock_s3
from moto.s3.responses import DEFAULT_REGION_NAME
from pytest import CaptureFixture, raises

from scripts.files.fs import NoSuchFileError, read, write_all, write_sidecars
from scripts.files.fs import NoSuchFileError, read, write, write_all, write_sidecars


def test_read_key_not_found_local() -> None:
Expand All @@ -25,9 +28,11 @@ def test_read_key_not_found_s3(capsys: CaptureFixture[str]) -> None:
assert logs["msg"] == "s3_key_not_found"


def test_write_all_file_not_found_local() -> None:
with raises(NoSuchFileError):
def test_write_all_file_not_found_local(capsys: CaptureFixture[str]) -> None:
# Raises an exception as all files are not writte·
with raises(Exception):
write_all(["/test.prj"], "/tmp")
assert '"error": "NoSuchFileError()"' in capsys.readouterr().out


def test_write_sidecars_file_not_found_local(capsys: CaptureFixture[str]) -> None:
Expand All @@ -38,12 +43,14 @@ def test_write_sidecars_file_not_found_local(capsys: CaptureFixture[str]) -> Non


@mock_s3 # type: ignore
def test_write_all_key_not_found_s3() -> None:
def test_write_all_key_not_found_s3(capsys: CaptureFixture[str]) -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")

with raises(NoSuchFileError):
# Raises an exception as all files are not written
with raises(Exception):
write_all(["s3://testbucket/test.tif"], "/tmp")
assert '"error": "NoSuchFileError()"' in capsys.readouterr().out


@mock_s3 # type: ignore
Expand All @@ -56,3 +63,20 @@ def test_write_sidecars_key_not_found_s3(capsys: CaptureFixture[str]) -> None:
# capsys.readouterr().out json string format is not valid which implies
# we can't parse it to find the actual `msg`
assert "No sidecar file found; skipping" in capsys.readouterr().out


def test_write_sidecars_one_found(capsys: CaptureFixture[str]) -> None:
target = mkdtemp()
# Add a file to read
content = b"test content"
path = os.path.join(target, "test.tfw")
write(path, content)
non_existing_path = os.path.join(target, "test.prj")
# Write the sidecar files with one unexisting
write_sidecars([non_existing_path, path], os.path.join(target, "/tmp"))
logs = capsys.readouterr().out
# One has not been found
assert "No sidecar file found; skipping" in logs
# One has been found
assert "wrote_sidecar_file" in logs
rmtree(target)
Loading