Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: write_file was not handling exception correctly TDE-1007 #836

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/format-tests.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
name: Format and Tests
on: [push]
on:
push:
pull_request:
branches:
- master

jobs:
build:
Expand Down
44 changes: 34 additions & 10 deletions scripts/files/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def write(destination: str, source: bytes, content_type: Optional[str] = None) -
source: The source file in bytes.
content_type: A standard Media Type describing the format of the contents.
"""
get_log().debug("write", path=destination)
if is_s3(destination):
fs_s3.write(destination, source, content_type)
else:
Expand All @@ -33,16 +34,35 @@ def read(path: str) -> bytes:
Returns:
bytes: The bytes content of the file.
"""
get_log().debug("read", path=path)
if is_s3(path):
try:
return fs_s3.read(path)
except resource("s3").meta.client.exceptions.NoSuchKey as error:
raise NoSuchFileError from error
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html#parsing-error-responses-and-catching-exceptions-from-aws-services
except resource("s3").meta.client.exceptions.ClientError as ce:
# Error Code can be found here:
# https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html#ErrorCodeList
if ce.response["Error"]["Code"] == "NoSuchKey":
raise NoSuchFileError(path) from ce

try:
return fs_local.read(path)
except FileNotFoundError as error:
raise NoSuchFileError from error
raise NoSuchFileError(path) from error


def copy(source: str, target: str) -> str:
"""Copy a `source` file to a `target`.

Args:
source: A path to a file to copy
target: A path of the copy to create

Returns:
The path of the file created
"""
source_content = read(source)
return write(target, source_content)


def exists(path: str) -> bool:
Expand Down Expand Up @@ -100,10 +120,11 @@ def write_sidecars(inputs: List[str], target: str, concurrency: Optional[int] =
with ThreadPoolExecutor(max_workers=concurrency) as executor:
results = {write_file(executor, input_, target): input_ for input_ in inputs}
for future in as_completed(results):
try:
future_ex = future.exception()
if isinstance(future_ex, NoSuchFileError):
get_log().info("No sidecar file found; skipping", path=future_ex.path)
else:
get_log().info("wrote_sidecar_file", path=future.result())
except NoSuchFileError:
get_log().info("No sidecar file found; skipping")


def write_file(executor: ThreadPoolExecutor, input_: str, target: str) -> Future[str]:
Expand All @@ -117,13 +138,16 @@ def write_file(executor: ThreadPoolExecutor, input_: str, target: str) -> Future
Returns:
Future[str]: The result of the execution.
"""
future: Future[str] = Future()
get_log().info(f"Trying write from file: {input_}")
try:
future = executor.submit(write, os.path.join(target, f"{os.path.basename(input_)}"), read(input_))
return executor.submit(copy, input_, os.path.join(target, f"{os.path.basename(input_)}"))
except NoSuchFileError as nsfe:
future: Future[str] = Future()
future.set_exception(nsfe)
return future
return future


class NoSuchFileError(Exception):
pass
def __init__(self, path: str) -> None:
self.message = f"File not found: {path}"
self.path = path
22 changes: 10 additions & 12 deletions scripts/files/tests/fs_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import os
from shutil import rmtree
from tempfile import mkdtemp
Expand All @@ -24,33 +23,32 @@ def test_read_key_not_found_s3(capsys: CaptureFixture[str]) -> None:
with raises(NoSuchFileError):
read("s3://testbucket/test.file")

logs = json.loads(capsys.readouterr().out)
assert logs["msg"] == "s3_key_not_found"
assert "s3_key_not_found" in capsys.readouterr().out


def test_write_all_file_not_found_local(capsys: CaptureFixture[str]) -> None:
def test_write_all_file_not_found_local() -> None:
# Raises an exception as all files are not writte·
with raises(Exception):
with raises(Exception) as e:
write_all(["/test.prj"], "/tmp")
assert '"error": "NoSuchFileError()"' in capsys.readouterr().out

assert str(e.value) == "Not all mandatory source files were written"


def test_write_sidecars_file_not_found_local(capsys: CaptureFixture[str]) -> None:
write_sidecars(["/test.prj"], "/tmp")

logs = json.loads(capsys.readouterr().out.strip())
assert logs["msg"] == "No sidecar file found; skipping"
assert "No sidecar file found; skipping" in capsys.readouterr().out


@mock_s3 # type: ignore
def test_write_all_key_not_found_s3(capsys: CaptureFixture[str]) -> None:
def test_write_all_key_not_found_s3() -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")

# Raises an exception as all files are not written
with raises(Exception):
with raises(Exception) as e:
write_all(["s3://testbucket/test.tif"], "/tmp")
assert '"error": "NoSuchFileError()"' in capsys.readouterr().out

assert str(e.value) == "Not all mandatory source files were written"


@mock_s3 # type: ignore
Expand Down
Loading