From 354287f023c34e269ca5ef0b24b28b9c37ae9dd7 Mon Sep 17 00:00:00 2001 From: Simon Bohnen Date: Wed, 2 Nov 2022 00:03:38 +0100 Subject: [PATCH] fix: avoid validating checksums for partial responses (#361) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Do not validate checksums for partial responses * fix: use constant * fix: import http * test: Add tests for checksum validation with partial responses * docs: specify that no validation is performed for chunked downloads * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * linting Co-authored-by: Simon Bohnen Co-authored-by: Owl Bot --- .../requests/__init__.py | 4 +++ .../requests/download.py | 13 +++++++-- google/resumable_media/requests/__init__.py | 4 +++ google/resumable_media/requests/download.py | 13 +++++++-- tests/unit/requests/test_download.py | 29 +++++++++++++++++++ tests_async/unit/requests/test_download.py | 23 +++++++++++++++ 6 files changed, 82 insertions(+), 4 deletions(-) diff --git a/google/_async_resumable_media/requests/__init__.py b/google/_async_resumable_media/requests/__init__.py index a0bd909d..ae29c623 100644 --- a/google/_async_resumable_media/requests/__init__.py +++ b/google/_async_resumable_media/requests/__init__.py @@ -286,6 +286,10 @@ def mock_default(scopes=None): In addition, a :class:`.ChunkedDownload` can also take optional ``start`` and ``end`` byte positions. +Usually, no checksum is returned with a chunked download. Even if one is returned, +it is not validated. If you need to validate the checksum, you can do so +by buffering the chunks and validating the checksum against the completed download. + ============== Simple Uploads ============== diff --git a/google/_async_resumable_media/requests/download.py b/google/_async_resumable_media/requests/download.py index 2b3e48ce..d4af79d9 100644 --- a/google/_async_resumable_media/requests/download.py +++ b/google/_async_resumable_media/requests/download.py @@ -15,6 +15,7 @@ """Support for downloading media from Google APIs.""" import urllib3.response # type: ignore +import http from google._async_resumable_media import _download from google._async_resumable_media import _helpers @@ -90,7 +91,11 @@ async def _write_to_stream(self, response): self._stream.write(chunk) local_checksum_object.update(chunk) - if expected_checksum is not None: + # Don't validate the checksum for partial responses. + if ( + expected_checksum is not None + and response.status != http.client.PARTIAL_CONTENT + ): actual_checksum = sync_helpers.prepare_checksum_digest( checksum_object.digest() ) @@ -213,7 +218,11 @@ async def _write_to_stream(self, response): self._stream.write(chunk) checksum_object.update(chunk) - if expected_checksum is not None: + # Don't validate the checksum for partial responses. + if ( + expected_checksum is not None + and response.status != http.client.PARTIAL_CONTENT + ): actual_checksum = sync_helpers.prepare_checksum_digest( checksum_object.digest() ) diff --git a/google/resumable_media/requests/__init__.py b/google/resumable_media/requests/__init__.py index ef8177f3..cfb9fb40 100644 --- a/google/resumable_media/requests/__init__.py +++ b/google/resumable_media/requests/__init__.py @@ -286,6 +286,10 @@ def mock_default(scopes=None): In addition, a :class:`.ChunkedDownload` can also take optional ``start`` and ``end`` byte positions. +Usually, no checksum is returned with a chunked download. Even if one is returned, +it is not validated. If you need to validate the checksum, you can do so +by buffering the chunks and validating the checksum against the completed download. + ============== Simple Uploads ============== diff --git a/google/resumable_media/requests/download.py b/google/resumable_media/requests/download.py index 88de223f..8d0319c7 100644 --- a/google/resumable_media/requests/download.py +++ b/google/resumable_media/requests/download.py @@ -15,6 +15,7 @@ """Support for downloading media from Google APIs.""" import urllib3.response # type: ignore +import http from google.resumable_media import _download from google.resumable_media import common @@ -124,7 +125,11 @@ def _write_to_stream(self, response): self._bytes_downloaded += len(chunk) local_checksum_object.update(chunk) - if expected_checksum is not None: + # Don't validate the checksum for partial responses. + if ( + expected_checksum is not None + and response.status_code != http.client.PARTIAL_CONTENT + ): actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest()) if actual_checksum != expected_checksum: msg = _CHECKSUM_MISMATCH.format( @@ -308,7 +313,11 @@ def _write_to_stream(self, response): checksum_object.update(chunk) response._content_consumed = True - if expected_checksum is not None: + # Don't validate the checksum for partial responses. + if ( + expected_checksum is not None + and response.status_code != http.client.PARTIAL_CONTENT + ): actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest()) if actual_checksum != expected_checksum: diff --git a/tests/unit/requests/test_download.py b/tests/unit/requests/test_download.py index 8da7ef27..a63fe6f9 100644 --- a/tests/unit/requests/test_download.py +++ b/tests/unit/requests/test_download.py @@ -114,6 +114,35 @@ def test__write_to_stream_with_hash_check_fail(self, checksum): chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False ) + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__write_to_stream_no_checksum_validation_for_partial_response( + self, checksum + ): + stream = io.BytesIO() + download = download_mod.Download(EXAMPLE_URL, stream=stream, checksum=checksum) + + chunk1 = b"first chunk" + response = _mock_response( + status_code=http.client.PARTIAL_CONTENT, chunks=[chunk1] + ) + + # Make sure that the checksum is not validated. + with mock.patch( + "google.resumable_media._helpers.prepare_checksum_digest", + return_value=None, + ) as prepare_checksum_digest: + download._write_to_stream(response) + assert not prepare_checksum_digest.called + + assert not download.finished + + # Check mocks. + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.iter_content.assert_called_once_with( + chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False + ) + def test__write_to_stream_with_invalid_checksum_type(self): BAD_CHECKSUM_TYPE = "badsum" diff --git a/tests_async/unit/requests/test_download.py b/tests_async/unit/requests/test_download.py index 9892206b..d0ac090c 100644 --- a/tests_async/unit/requests/test_download.py +++ b/tests_async/unit/requests/test_download.py @@ -100,6 +100,29 @@ async def test__write_to_stream_with_hash_check_fail(self, checksum): ) assert error.args[0] == msg + @pytest.mark.asyncio + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + async def test__write_to_stream_no_checksum_validation_for_partial_response( + self, checksum + ): + stream = io.BytesIO() + download = download_mod.Download( + sync_test.EXAMPLE_URL, stream=stream, checksum=checksum + ) + + chunk1 = b"first chunk" + response = _mock_response(status=http.client.PARTIAL_CONTENT, chunks=[chunk1]) + + # Make sure that the checksum is not validated. + with mock.patch( + "google.resumable_media._helpers.prepare_checksum_digest", + return_value=None, + ) as prepare_checksum_digest: + await download._write_to_stream(response) + assert not prepare_checksum_digest.called + + assert not download.finished + @pytest.mark.asyncio async def test__write_to_stream_with_invalid_checksum_type(self): BAD_CHECKSUM_TYPE = "badsum"