Skip to content

Commit

Permalink
Add chunk download retry when expected size is not met
Browse files Browse the repository at this point in the history
Co-authored-by: Carlos McGregor <[email protected]>
  • Loading branch information
replaceafill and Carlos McGregor authored Jan 23, 2024
1 parent d2907d6 commit 41a28d7
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 13 deletions.
42 changes: 31 additions & 11 deletions storage_service/locations/models/duracloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,9 @@ def _generate_duracloud_request(self, url):
prepped.url = url
return prepped

def _download_file(self, url, download_path, expected_size=0, checksum=None):
def _download_file(
self, url, download_path, expected_size=0, checksum=None, retry=0
):
"""
Helper to download files from DuraCloud.
Expand Down Expand Up @@ -271,16 +273,34 @@ def _download_file(self, url, download_path, expected_size=0, checksum=None):

# Verify file, if size or checksum is known
if expected_size and os.path.getsize(download_path) != expected_size:
raise StorageException(
_(
"File %(path)s does not match expected size of %(expected_size)s bytes, but was actually %(actual_size)s bytes"
),
{
"path": download_path,
"expected_size": expected_size,
"actual_size": os.path.getsize(download_path),
},
)
if retry < 3:
LOGGER.error(
"[RETRY=%(retry)d] File %(path)s does not match expected size of %(expected_size)s bytes, but was actually %(actual_size)s bytes"
% {
"retry": retry + 1,
"path": download_path,
"expected_size": expected_size,
"actual_size": os.path.getsize(download_path),
}
)
return self._download_file(
url,
download_path,
expected_size=expected_size,
checksum=checksum,
retry=retry + 1,
)
else:
raise StorageException(
_(
"File %(path)s does not match expected size of %(expected_size)s bytes, but was actually %(actual_size)s bytes"
),
{
"path": download_path,
"expected_size": expected_size,
"actual_size": os.path.getsize(download_path),
},
)
calculated_checksum = utils.generate_checksum(download_path, "md5")
if checksum and checksum != calculated_checksum.hexdigest():
raise StorageException(
Expand Down
62 changes: 60 additions & 2 deletions storage_service/locations/tests/test_duracloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,61 @@ def test_move_to_storage_service_fails_if_it_cannot_download_file(
d.move_to_storage_service("some/file.txt", dst.as_posix(), None)


@pytest.mark.django_db
def test_move_to_storage_service_retries_after_chunk_download_errors(
space, mocker, tmp_path
):
send = mocker.patch(
"requests.Session.send",
side_effect=[
mocker.Mock(status_code=404, spec=requests.Response),
mocker.Mock(status_code=200, content=b"a", spec=requests.Response),
mocker.Mock(status_code=200, content=b"a ch", spec=requests.Response),
mocker.Mock(status_code=200, content=b"unked file", spec=requests.Response),
],
)
mocker.patch(
"requests.Session.get",
side_effect=[
mocker.Mock(
status_code=200,
content=b"""\
<dur>
<header>
<sourceContent>
<byteSize>14</byteSize>
<md5>1b8107d332e5d9f2a0b8e5924ca1ca3e</md5>
</sourceContent>
</header>
<chunks>
<chunk chunkId="some/file.txt.dura-chunk-0001">
<byteSize>4</byteSize>
<md5>1781a616499ac88f78b56af57fcca974</md5>
</chunk>
</chunks>
<chunks>
<chunk chunkId="some/file.txt.dura-chunk-0002">
<byteSize>10</byteSize>
<md5>29224657c84874b1c83a92fae2f2ea22</md5>
</chunk>
</chunks>
</dur>
""",
spec=requests.Response,
),
],
)
d = Duracloud.objects.create(space=space, host="duracloud.org", duraspace="myspace")
dst = tmp_path / "dst" / "file.txt"

d.move_to_storage_service("some/file.txt", dst.as_posix(), None)

assert dst.read_text() == "a chunked file"

# The session.send mock shows the first chunk download was retried.
assert len(send.mock_calls) == 4


@pytest.mark.django_db
def test_move_to_storage_service_fails_if_chunk_size_does_not_match(
space, mocker, tmp_path
Expand All @@ -334,6 +389,9 @@ def test_move_to_storage_service_fails_if_chunk_size_does_not_match(
"requests.Session.send",
side_effect=[
mocker.Mock(status_code=404, spec=requests.Response),
# Fail all the download retry attempts.
mocker.Mock(status_code=200, content=b"ERRORERROR", spec=requests.Response),
mocker.Mock(status_code=200, content=b"ERRORERROR", spec=requests.Response),
mocker.Mock(status_code=200, content=b"ERRORERROR", spec=requests.Response),
mocker.Mock(status_code=200, content=b"unked file", spec=requests.Response),
],
Expand Down Expand Up @@ -386,8 +444,8 @@ def test_move_to_storage_service_fails_if_chunk_size_does_not_match(
# The destination file was created but no chunks were written to it.
assert dst.read_text() == ""

# The session.send mock was not called after the first chunk failed validation.
assert len(send.mock_calls) == 2
# The session.send mock was called the maximum amount of download retries.
assert len(send.mock_calls) == 5


@pytest.mark.django_db
Expand Down

0 comments on commit 41a28d7

Please sign in to comment.