From d1250cc8a821c58de08cb21647dbce101d666922 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 10 Jun 2021 15:49:06 +0300 Subject: [PATCH 01/10] Remove average download speed check The average download speed check does not achieve its purpose of protecting against slow retrieval attacks. Such protection is highly dependent on the networking stack which could be user-provided and not under tuf's control. The slow retrieval attack protection has been removed from the specification for similar reasons. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/download.py | 60 +----------------------------- 1 file changed, 2 insertions(+), 58 deletions(-) diff --git a/tuf/ngclient/_internal/download.py b/tuf/ngclient/_internal/download.py index 31b59f6630..28d3a5d53e 100644 --- a/tuf/ngclient/_internal/download.py +++ b/tuf/ngclient/_internal/download.py @@ -25,12 +25,10 @@ import logging import tempfile -import timeit from urllib import parse from securesystemslib import formats as sslib_formats -import tuf from tuf import exceptions, formats # See 'log.py' to learn how logging is handled in TUF. @@ -42,9 +40,7 @@ def download_file(url, required_length, fetcher, strict_required_length=True): Given the url and length of the desired file, this function opens a connection to 'url' and downloads the file while ensuring its length - matches 'required_length' if 'STRICT_REQUIRED_LENGH' is True (If False, - the file's length is not checked and a slow retrieval exception is raised - if the downloaded rate falls below the acceptable rate). + matches 'required_length' if 'STRICT_REQUIRED_LENGTH' is True. url: @@ -92,43 +88,19 @@ def download_file(url, required_length, fetcher, strict_required_length=True): # the downloaded file. temp_file = tempfile.TemporaryFile() # pylint: disable=consider-using-with - average_download_speed = 0 number_of_bytes_received = 0 try: chunks = fetcher.fetch(url, required_length) - start_time = timeit.default_timer() for chunk in chunks: - - stop_time = timeit.default_timer() temp_file.write(chunk) - - # Measure the average download speed. number_of_bytes_received += len(chunk) - seconds_spent_receiving = stop_time - start_time - average_download_speed = ( - number_of_bytes_received / seconds_spent_receiving - ) - - if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - logger.debug( - "The average download speed dropped below the minimum" - " average download speed set in tuf.settings.py." - " Stopping the download!" - ) - break - - logger.debug( - "The average download speed has not dipped below the" - " minimum average download speed set in tuf.settings.py." - ) # Does the total number of downloaded bytes match the required length? _check_downloaded_length( number_of_bytes_received, required_length, strict_required_length=strict_required_length, - average_download_speed=average_download_speed, ) except Exception: @@ -154,10 +126,7 @@ def download_bytes(url, required_length, fetcher, strict_required_length=True): def _check_downloaded_length( - total_downloaded, - required_length, - strict_required_length=True, - average_download_speed=None, + total_downloaded, required_length, strict_required_length=True ): """ @@ -182,9 +151,6 @@ def _check_downloaded_length( False when we know that we want to turn this off for downloading the timestamp metadata, which has no signed required_length. - average_download_speed: - The average download speed for the downloaded file. - None. @@ -193,10 +159,6 @@ def _check_downloaded_length( strict_required_length is True and total_downloaded is not equal required_length. - exceptions.SlowRetrievalError, if the total downloaded was - done in less than the acceptable download speed (as set in - tuf.settings.py). - None. """ @@ -214,28 +176,10 @@ def _check_downloaded_length( required_length, ) - # If the average download speed is below a certain threshold, we - # flag this as a possible slow-retrieval attack. - if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - raise exceptions.SlowRetrievalError(average_download_speed) - raise exceptions.DownloadLengthMismatchError( required_length, total_downloaded ) - # We specifically disabled strict checking of required length, but - # we will log a warning anyway. This is useful when we wish to - # download the Timestamp or Root metadata, for which we have no - # signed metadata; so, we must guess a reasonable required_length - # for it. - if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - raise exceptions.SlowRetrievalError(average_download_speed) - - logger.debug( - "Good average download speed: %f bytes per second", - average_download_speed, - ) - logger.info( "Downloaded %d bytes out of upper limit of %d bytes.", total_downloaded, From c4a9ec1141411a54af93714b5c8c622cb43d27e4 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 10 Jun 2021 16:13:18 +0300 Subject: [PATCH 02/10] Remove strict_required_length option Remove the strict length check in download.py and check only if an upper bowndary is exceeded. Exact length is not known for some metadata files during download. The check makes the code unnecessarily complicated and is better suited for the later metadata verification step. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/download.py | 92 ++++-------------------------- tuf/ngclient/updater.py | 7 +-- 2 files changed, 11 insertions(+), 88 deletions(-) diff --git a/tuf/ngclient/_internal/download.py b/tuf/ngclient/_internal/download.py index 28d3a5d53e..697a73b625 100644 --- a/tuf/ngclient/_internal/download.py +++ b/tuf/ngclient/_internal/download.py @@ -35,25 +35,19 @@ logger = logging.getLogger(__name__) -def download_file(url, required_length, fetcher, strict_required_length=True): +def download_file(url, required_length, fetcher): """ Given the url and length of the desired file, this function opens a - connection to 'url' and downloads the file while ensuring its length - matches 'required_length' if 'STRICT_REQUIRED_LENGTH' is True. + connection to 'url' and downloads the file up to 'required_length'. url: A URL string that represents the location of the file. required_length: - An integer value representing the length of the file. - - strict_required_length: - A Boolean indicator used to signal whether we should perform strict - checking of required_length. True by default. We explicitly set this to - False when we know that we want to turn this off for downloading the - timestamp metadata, which has no signed required_length. + An integer value representing the length of the file or an + upper boundary. A file object is created on disk to store the contents of 'url'. @@ -96,12 +90,10 @@ def download_file(url, required_length, fetcher, strict_required_length=True): temp_file.write(chunk) number_of_bytes_received += len(chunk) - # Does the total number of downloaded bytes match the required length? - _check_downloaded_length( - number_of_bytes_received, - required_length, - strict_required_length=strict_required_length, - ) + if number_of_bytes_received > required_length: + raise exceptions.DownloadLengthMismatchError( + required_length, number_of_bytes_received + ) except Exception: # Close 'temp_file'. Any written data is lost. @@ -114,74 +106,10 @@ def download_file(url, required_length, fetcher, strict_required_length=True): return temp_file -def download_bytes(url, required_length, fetcher, strict_required_length=True): +def download_bytes(url, required_length, fetcher): """Download bytes from given url Returns the downloaded bytes, otherwise like download_file() """ - with download_file( - url, required_length, fetcher, strict_required_length - ) as dl_file: + with download_file(url, required_length, fetcher) as dl_file: return dl_file.read() - - -def _check_downloaded_length( - total_downloaded, required_length, strict_required_length=True -): - """ - - A helper function which checks whether the total number of downloaded - bytes matches our expectation. - - - total_downloaded: - The total number of bytes supposedly downloaded for the file in - question. - - required_length: - The total number of bytes expected of the file as seen from its metadata - The Timestamp role is always downloaded without a known file length, and - the Root role when the client cannot download any of the required - top-level roles. In both cases, 'required_length' is actually an upper - limit on the length of the downloaded file. - - strict_required_length: - A Boolean indicator used to signal whether we should perform strict - checking of required_length. True by default. We explicitly set this to - False when we know that we want to turn this off for downloading the - timestamp metadata, which has no signed required_length. - - - None. - - - securesystemslib.exceptions.DownloadLengthMismatchError, if - strict_required_length is True and total_downloaded is not equal - required_length. - - - None. - """ - - if total_downloaded == required_length: - logger.info("Downloaded %d bytes as expected.", total_downloaded) - - else: - # What we downloaded is not equal to the required length, but did we ask - # for strict checking of required length? - if strict_required_length: - logger.info( - "Downloaded %d bytes, but expected %d bytes", - total_downloaded, - required_length, - ) - - raise exceptions.DownloadLengthMismatchError( - required_length, total_downloaded - ) - - logger.info( - "Downloaded %d bytes out of upper limit of %d bytes.", - total_downloaded, - required_length, - ) diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py index b054abdaf3..0e16149aad 100644 --- a/tuf/ngclient/updater.py +++ b/tuf/ngclient/updater.py @@ -215,12 +215,7 @@ def _download_metadata( else: filename = f"{version}.{rolename}.json" url = parse.urljoin(self._metadata_base_url, filename) - return download.download_bytes( - url, - length, - self._fetcher, - strict_required_length=False, - ) + return download.download_bytes(url, length, self._fetcher) def _load_local_metadata(self, rolename: str) -> bytes: with open(os.path.join(self._dir, f"{rolename}.json"), "rb") as f: From 8c2229478f766a5bfb87ad8e031c57819111c14d Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Tue, 15 Jun 2021 13:58:28 +0300 Subject: [PATCH 03/10] Remove format checks Remove format checking of download_file() parameters. The function is internal and receives trusted input from updater.py. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/download.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tuf/ngclient/_internal/download.py b/tuf/ngclient/_internal/download.py index 697a73b625..24c42dfea0 100644 --- a/tuf/ngclient/_internal/download.py +++ b/tuf/ngclient/_internal/download.py @@ -27,9 +27,7 @@ import tempfile from urllib import parse -from securesystemslib import formats as sslib_formats - -from tuf import exceptions, formats +from tuf import exceptions # See 'log.py' to learn how logging is handled in TUF. logger = logging.getLogger(__name__) @@ -56,19 +54,11 @@ def download_file(url, required_length, fetcher): exceptions.DownloadLengthMismatchError, if there was a mismatch of observed vs expected lengths while downloading the file. - securesystemslib.exceptions.FormatError, if any of the arguments are - improperly formatted. - Any other unforeseen runtime exception. A file object that points to the contents of 'url'. """ - # Do all of the arguments have the appropriate format? - # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. - sslib_formats.URL_SCHEMA.check_match(url) - formats.LENGTH_SCHEMA.check_match(required_length) - # 'url.replace('\\', '/')' is needed for compatibility with Windows-based # systems, because they might use back-slashes in place of forward-slashes. # This converts it to the common format. unquote() replaces %xx escapes in From 8692663fbcc7da5cdf835af6c08f4b00da14531c Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Fri, 11 Jun 2021 16:43:23 +0300 Subject: [PATCH 04/10] Reduce download logging Remove logging when an exception is raised. Reduce logging level to debug. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/download.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tuf/ngclient/_internal/download.py b/tuf/ngclient/_internal/download.py index 24c42dfea0..3ab6cc12a4 100644 --- a/tuf/ngclient/_internal/download.py +++ b/tuf/ngclient/_internal/download.py @@ -22,14 +22,12 @@ length of a downloaded file has to match the hash and length supplied by the metadata of that file. """ - import logging import tempfile from urllib import parse from tuf import exceptions -# See 'log.py' to learn how logging is handled in TUF. logger = logging.getLogger(__name__) @@ -66,7 +64,7 @@ def download_file(url, required_length, fetcher): # encoded as %5c in the url, which should also be replaced with a forward # slash. url = parse.unquote(url).replace("\\", "/") - logger.info("Downloading: %s", url) + logger.debug("Downloading: %s", url) # This is the temporary file that we will return to contain the contents of # the downloaded file. @@ -88,7 +86,6 @@ def download_file(url, required_length, fetcher): except Exception: # Close 'temp_file'. Any written data is lost. temp_file.close() - logger.debug("Could not download URL: %s", url) raise else: From 0b77eb90b82f289ff2641edd47e3b49741e462ce Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 14 Jun 2021 17:50:24 +0300 Subject: [PATCH 05/10] Implement download_file as contextmanager It allows us to remove the disable of pylint consider-using-with warning and does not require any manual file closing. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/download.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/tuf/ngclient/_internal/download.py b/tuf/ngclient/_internal/download.py index 3ab6cc12a4..7a55236d9c 100644 --- a/tuf/ngclient/_internal/download.py +++ b/tuf/ngclient/_internal/download.py @@ -24,13 +24,14 @@ """ import logging import tempfile +from contextlib import contextmanager from urllib import parse from tuf import exceptions logger = logging.getLogger(__name__) - +@contextmanager def download_file(url, required_length, fetcher): """ @@ -66,31 +67,19 @@ def download_file(url, required_length, fetcher): url = parse.unquote(url).replace("\\", "/") logger.debug("Downloading: %s", url) - # This is the temporary file that we will return to contain the contents of - # the downloaded file. - temp_file = tempfile.TemporaryFile() # pylint: disable=consider-using-with - number_of_bytes_received = 0 - try: + with tempfile.TemporaryFile() as temp_file: chunks = fetcher.fetch(url, required_length) for chunk in chunks: temp_file.write(chunk) number_of_bytes_received += len(chunk) - if number_of_bytes_received > required_length: raise exceptions.DownloadLengthMismatchError( required_length, number_of_bytes_received ) - - except Exception: - # Close 'temp_file'. Any written data is lost. - temp_file.close() - raise - - else: temp_file.seek(0) - return temp_file + yield temp_file def download_bytes(url, required_length, fetcher): From 35fb22c13a60815d3237a6cd05b46457e64ade01 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 14 Jun 2021 17:51:09 +0300 Subject: [PATCH 06/10] Simplify RequestsFetcher.fetch() Make the internal function chunks() a separate private method of Reqests fetcher. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/requests_fetcher.py | 93 +++++++++++----------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/tuf/ngclient/_internal/requests_fetcher.py b/tuf/ngclient/_internal/requests_fetcher.py index 216153b1f9..2905319754 100644 --- a/tuf/ngclient/_internal/requests_fetcher.py +++ b/tuf/ngclient/_internal/requests_fetcher.py @@ -21,7 +21,7 @@ # Globals logger = logging.getLogger(__name__) -# Classess +# Classes class RequestsFetcher(FetcherInterface): """A concrete implementation of FetcherInterface based on the Requests library. @@ -90,58 +90,59 @@ def fetch(self, url, required_length): status = e.response.status_code raise exceptions.FetcherHTTPError(str(e), status) - # Define a generator function to be returned by fetch. This way the - # caller of fetch can differentiate between connection and actual data - # download and measure download times accordingly. - def chunks(): - try: - bytes_received = 0 - while True: - # We download a fixed chunk of data in every round. This is - # so that we can defend against slow retrieval attacks. - # Furthermore, we do not wish to download an extremely - # large file in one shot. Before beginning the round, sleep - # (if set) for a short amount of time so that the CPU is not - # hogged in the while loop. - if self.sleep_before_round: - time.sleep(self.sleep_before_round) - - read_amount = min( - self.chunk_size, - required_length - bytes_received, - ) - - # NOTE: This may not handle some servers adding a - # Content-Encoding header, which may cause urllib3 to - # misbehave: - # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 - data = response.raw.read(read_amount) - bytes_received += len(data) + return self._chunks(response, required_length) - # We might have no more data to read. Check number of bytes - # downloaded. - if not data: - logger.debug( - "Downloaded %d out of %d bytes", - bytes_received, - required_length, - ) + def _chunks(self, response, required_length): + """A generator function to be returned by fetch. This way the + caller of fetch can differentiate between connection and actual data + download.""" - # Finally, we signal that the download is complete. - break + try: + bytes_received = 0 + while True: + # We download a fixed chunk of data in every round. This is + # so that we can defend against slow retrieval attacks. + # Furthermore, we do not wish to download an extremely + # large file in one shot. Before beginning the round, sleep + # (if set) for a short amount of time so that the CPU is not + # hogged in the while loop. + if self.sleep_before_round: + time.sleep(self.sleep_before_round) + + read_amount = min( + self.chunk_size, + required_length - bytes_received, + ) + + # NOTE: This may not handle some servers adding a + # Content-Encoding header, which may cause urllib3 to + # misbehave: + # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 + data = response.raw.read(read_amount) + bytes_received += len(data) + + # We might have no more data to read. Check number of bytes + # downloaded. + if not data: + logger.debug( + "Downloaded %d out of %d bytes", + bytes_received, + required_length, + ) - yield data + # Finally, we signal that the download is complete. + break - if bytes_received >= required_length: - break + yield data - except urllib3.exceptions.ReadTimeoutError as e: - raise exceptions.SlowRetrievalError(str(e)) + if bytes_received >= required_length: + break - finally: - response.close() + except urllib3.exceptions.ReadTimeoutError as e: + raise exceptions.SlowRetrievalError(str(e)) - return chunks() + finally: + response.close() def _get_session(self, url): """Returns a different customized requests.Session per schema+hostname From 9daccac7f840ae84fccdbfb4c39f5f1372e9f8bc Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Tue, 15 Jun 2021 16:25:58 +0300 Subject: [PATCH 07/10] Reduce logging in requests_fetcher Remove repetitive debug logging. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/requests_fetcher.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tuf/ngclient/_internal/requests_fetcher.py b/tuf/ngclient/_internal/requests_fetcher.py index 2905319754..4dc68e42fc 100644 --- a/tuf/ngclient/_internal/requests_fetcher.py +++ b/tuf/ngclient/_internal/requests_fetcher.py @@ -124,12 +124,6 @@ def _chunks(self, response, required_length): # We might have no more data to read. Check number of bytes # downloaded. if not data: - logger.debug( - "Downloaded %d out of %d bytes", - bytes_received, - required_length, - ) - # Finally, we signal that the download is complete. break @@ -138,6 +132,12 @@ def _chunks(self, response, required_length): if bytes_received >= required_length: break + logger.debug( + "Downloaded %d out of %d bytes", + bytes_received, + required_length, + ) + except urllib3.exceptions.ReadTimeoutError as e: raise exceptions.SlowRetrievalError(str(e)) @@ -158,10 +158,6 @@ def _get_session(self, url): ) session_index = parsed_url.scheme + "+" + parsed_url.hostname - - logger.debug("url: %s", url) - logger.debug("session index: %s", session_index) - session = self._sessions.get(session_index) if not session: From 16f42fc146def962044b6c9bb2535c179250d3b2 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Tue, 15 Jun 2021 16:09:24 +0300 Subject: [PATCH 08/10] Add type annotations Add missing type annotations in download.py, fetcher.py and requests_fetcher.py Update docstrings to match the new style. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/download.py | 66 ++++++++-------------- tuf/ngclient/_internal/requests_fetcher.py | 8 ++- tuf/ngclient/fetcher.py | 3 +- 3 files changed, 29 insertions(+), 48 deletions(-) diff --git a/tuf/ngclient/_internal/download.py b/tuf/ngclient/_internal/download.py index 7a55236d9c..3a18995f08 100644 --- a/tuf/ngclient/_internal/download.py +++ b/tuf/ngclient/_internal/download.py @@ -1,61 +1,37 @@ -#!/usr/bin/env python - # Copyright 2012 - 2017, New York University and the TUF contributors # SPDX-License-Identifier: MIT OR Apache-2.0 +"""Handles the download of URL contents to a file" """ - - download.py - - - February 21, 2012. Based on previous version by Geremy Condra. - - - Konstantin Andrianov - Vladimir Diaz - - - See LICENSE-MIT OR LICENSE for licensing information. - - Download metadata and target files and check their validity. The hash and - length of a downloaded file has to match the hash and length supplied by the - metadata of that file. -""" import logging import tempfile from contextlib import contextmanager +from typing import IO, Iterator from urllib import parse from tuf import exceptions logger = logging.getLogger(__name__) -@contextmanager -def download_file(url, required_length, fetcher): - """ - - Given the url and length of the desired file, this function opens a - connection to 'url' and downloads the file up to 'required_length'. - - url: - A URL string that represents the location of the file. - - required_length: - An integer value representing the length of the file or an - upper boundary. - - - A file object is created on disk to store the contents of 'url'. - - - exceptions.DownloadLengthMismatchError, if there was a - mismatch of observed vs expected lengths while downloading the file. - - Any other unforeseen runtime exception. - - +@contextmanager +def download_file( + url: str, required_length: int, fetcher: "FetcherInterface" +) -> Iterator[IO]: + """Opens a connection to 'url' and downloads the content + up to 'required_length'. + + Args: + url: a URL string that represents the location of the file. + required_length: an integer value representing the length of + the file or an upper boundary. + + Raises: + DownloadLengthMismatchError: a mismatch of observed vs expected + lengths while downloading the file. + + Returns: A file object that points to the contents of 'url'. """ # 'url.replace('\\', '/')' is needed for compatibility with Windows-based @@ -82,7 +58,9 @@ def download_file(url, required_length, fetcher): yield temp_file -def download_bytes(url, required_length, fetcher): +def download_bytes( + url: str, required_length: int, fetcher: "FetcherInterface" +) -> bytes: """Download bytes from given url Returns the downloaded bytes, otherwise like download_file() diff --git a/tuf/ngclient/_internal/requests_fetcher.py b/tuf/ngclient/_internal/requests_fetcher.py index 4dc68e42fc..2101f844d9 100644 --- a/tuf/ngclient/_internal/requests_fetcher.py +++ b/tuf/ngclient/_internal/requests_fetcher.py @@ -7,7 +7,7 @@ import logging import time -from typing import Optional +from typing import Iterator, Optional from urllib import parse # Imports @@ -53,7 +53,7 @@ def __init__(self): self.chunk_size: int = 400000 # bytes self.sleep_before_round: Optional[int] = None - def fetch(self, url, required_length): + def fetch(self, url: str, required_length: int) -> Iterator[bytes]: """Fetches the contents of HTTP/HTTPS url from a remote server. Ensures the length of the downloaded data is up to 'required_length'. @@ -92,7 +92,9 @@ def fetch(self, url, required_length): return self._chunks(response, required_length) - def _chunks(self, response, required_length): + def _chunks( + self, response: "requests.Response", required_length: int + ) -> Iterator[bytes]: """A generator function to be returned by fetch. This way the caller of fetch can differentiate between connection and actual data download.""" diff --git a/tuf/ngclient/fetcher.py b/tuf/ngclient/fetcher.py index 8a6cae34d7..be4c65112e 100644 --- a/tuf/ngclient/fetcher.py +++ b/tuf/ngclient/fetcher.py @@ -6,6 +6,7 @@ # Imports import abc +from typing import Iterator # Classes @@ -20,7 +21,7 @@ class FetcherInterface: __metaclass__ = abc.ABCMeta @abc.abstractmethod - def fetch(self, url, required_length): + def fetch(self, url: str, required_length: int) -> Iterator[bytes]: """Fetches the contents of HTTP/HTTPS url from a remote server. Ensures the length of the downloaded data is up to 'required_length'. From 1596de24545637d0912f1943fb87e66c5b340a4e Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 17 Jun 2021 15:37:50 +0300 Subject: [PATCH 09/10] Move download.py functions to FetcherInterface Make download_file and download_bytes functions part of the FetcherInterface class. Remove download.py module. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/download.py | 69 ------------------------------ tuf/ngclient/fetcher.py | 58 ++++++++++++++++++++++++- tuf/ngclient/updater.py | 14 +++--- 3 files changed, 62 insertions(+), 79 deletions(-) delete mode 100644 tuf/ngclient/_internal/download.py diff --git a/tuf/ngclient/_internal/download.py b/tuf/ngclient/_internal/download.py deleted file mode 100644 index 3a18995f08..0000000000 --- a/tuf/ngclient/_internal/download.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2012 - 2017, New York University and the TUF contributors -# SPDX-License-Identifier: MIT OR Apache-2.0 - -"""Handles the download of URL contents to a file" -""" - -import logging -import tempfile -from contextlib import contextmanager -from typing import IO, Iterator -from urllib import parse - -from tuf import exceptions - -logger = logging.getLogger(__name__) - - -@contextmanager -def download_file( - url: str, required_length: int, fetcher: "FetcherInterface" -) -> Iterator[IO]: - """Opens a connection to 'url' and downloads the content - up to 'required_length'. - - Args: - url: a URL string that represents the location of the file. - required_length: an integer value representing the length of - the file or an upper boundary. - - Raises: - DownloadLengthMismatchError: a mismatch of observed vs expected - lengths while downloading the file. - - Returns: - A file object that points to the contents of 'url'. - """ - # 'url.replace('\\', '/')' is needed for compatibility with Windows-based - # systems, because they might use back-slashes in place of forward-slashes. - # This converts it to the common format. unquote() replaces %xx escapes in - # a url with their single-character equivalent. A back-slash may be - # encoded as %5c in the url, which should also be replaced with a forward - # slash. - url = parse.unquote(url).replace("\\", "/") - logger.debug("Downloading: %s", url) - - number_of_bytes_received = 0 - - with tempfile.TemporaryFile() as temp_file: - chunks = fetcher.fetch(url, required_length) - for chunk in chunks: - temp_file.write(chunk) - number_of_bytes_received += len(chunk) - if number_of_bytes_received > required_length: - raise exceptions.DownloadLengthMismatchError( - required_length, number_of_bytes_received - ) - temp_file.seek(0) - yield temp_file - - -def download_bytes( - url: str, required_length: int, fetcher: "FetcherInterface" -) -> bytes: - """Download bytes from given url - - Returns the downloaded bytes, otherwise like download_file() - """ - with download_file(url, required_length, fetcher) as dl_file: - return dl_file.read() diff --git a/tuf/ngclient/fetcher.py b/tuf/ngclient/fetcher.py index be4c65112e..375e60d31b 100644 --- a/tuf/ngclient/fetcher.py +++ b/tuf/ngclient/fetcher.py @@ -6,7 +6,15 @@ # Imports import abc -from typing import Iterator +import logging +import tempfile +from contextlib import contextmanager +from typing import IO, Iterator +from urllib import parse + +from tuf import exceptions + +logger = logging.getLogger(__name__) # Classes @@ -40,3 +48,51 @@ def fetch(self, url: str, required_length: int) -> Iterator[bytes]: A bytes iterator """ raise NotImplementedError # pragma: no cover + + @contextmanager + def download_file(self, url: str, required_length: int) -> Iterator[IO]: + """Opens a connection to 'url' and downloads the content + up to 'required_length'. + + Args: + url: a URL string that represents the location of the file. + required_length: an integer value representing the length of + the file or an upper boundary. + + Raises: + DownloadLengthMismatchError: a mismatch of observed vs expected + lengths while downloading the file. + + Yields: + A file object that points to the contents of 'url'. + """ + # 'url.replace('\\', '/')' is needed for compatibility with + # Windows-based systems, because they might use back-slashes in place + # of forward-slashes. This converts it to the common format. + # unquote() replaces %xx escapes in a url with their single-character + # equivalent. A back-slash may beencoded as %5c in the url, which + # should also be replaced with a forward slash. + url = parse.unquote(url).replace("\\", "/") + logger.debug("Downloading: %s", url) + + number_of_bytes_received = 0 + + with tempfile.TemporaryFile() as temp_file: + chunks = self.fetch(url, required_length) + for chunk in chunks: + temp_file.write(chunk) + number_of_bytes_received += len(chunk) + if number_of_bytes_received > required_length: + raise exceptions.DownloadLengthMismatchError( + required_length, number_of_bytes_received + ) + temp_file.seek(0) + yield temp_file + + def download_bytes(self, url: str, required_length: int) -> bytes: + """Download bytes from given url + + Returns the downloaded bytes, otherwise like download_file() + """ + with self.download_file(url, required_length) as dl_file: + return dl_file.read() diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py index 0e16149aad..de3af53cff 100644 --- a/tuf/ngclient/updater.py +++ b/tuf/ngclient/updater.py @@ -14,11 +14,7 @@ from securesystemslib import util as sslib_util from tuf import exceptions -from tuf.ngclient._internal import ( - download, - requests_fetcher, - trusted_metadata_set, -) +from tuf.ngclient._internal import requests_fetcher, trusted_metadata_set from tuf.ngclient.config import UpdaterConfig from tuf.ngclient.fetcher import FetcherInterface @@ -27,7 +23,7 @@ class Updater: """ - An implemetation of the TUF client workflow. + An implementation of the TUF client workflow. Provides a public API for integration in client applications. """ @@ -193,8 +189,8 @@ def download_target( target_fileinfo: "TargetFile" = targetinfo["fileinfo"] full_url = parse.urljoin(target_base_url, target_filepath) - with download.download_file( - full_url, target_fileinfo.length, self._fetcher + with self._fetcher.download_file( + full_url, target_fileinfo.length ) as target_file: try: target_fileinfo.verify_length_and_hashes(target_file) @@ -215,7 +211,7 @@ def _download_metadata( else: filename = f"{version}.{rolename}.json" url = parse.urljoin(self._metadata_base_url, filename) - return download.download_bytes(url, length, self._fetcher) + return self._fetcher.download_bytes(url, length) def _load_local_metadata(self, rolename: str) -> bytes: with open(os.path.join(self._dir, f"{rolename}.json"), "rb") as f: From 0eff004af58f997bc16b83242e58c0f35e88951c Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Tue, 6 Jul 2021 20:04:37 +0300 Subject: [PATCH 10/10] Replace required_length with max_length Rename required_length argument in fetcher to max_length to better match its purpose. Docstring improvements. Signed-off-by: Teodora Sechkova --- tuf/ngclient/_internal/requests_fetcher.py | 18 ++++++------- tuf/ngclient/fetcher.py | 31 +++++++++++----------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/tuf/ngclient/_internal/requests_fetcher.py b/tuf/ngclient/_internal/requests_fetcher.py index 2101f844d9..a26231c5bb 100644 --- a/tuf/ngclient/_internal/requests_fetcher.py +++ b/tuf/ngclient/_internal/requests_fetcher.py @@ -53,15 +53,15 @@ def __init__(self): self.chunk_size: int = 400000 # bytes self.sleep_before_round: Optional[int] = None - def fetch(self, url: str, required_length: int) -> Iterator[bytes]: + def fetch(self, url: str, max_length: int) -> Iterator[bytes]: """Fetches the contents of HTTP/HTTPS url from a remote server. - Ensures the length of the downloaded data is up to 'required_length'. + Ensures the length of the downloaded data is up to 'max_length'. Arguments: url: A URL string that represents a file location. - required_length: An integer value representing the file length in - bytes. + max_length: An integer value representing the maximum + number of bytes to be downloaded. Raises: exceptions.SlowRetrievalError: A timeout occurs while receiving @@ -90,10 +90,10 @@ def fetch(self, url: str, required_length: int) -> Iterator[bytes]: status = e.response.status_code raise exceptions.FetcherHTTPError(str(e), status) - return self._chunks(response, required_length) + return self._chunks(response, max_length) def _chunks( - self, response: "requests.Response", required_length: int + self, response: "requests.Response", max_length: int ) -> Iterator[bytes]: """A generator function to be returned by fetch. This way the caller of fetch can differentiate between connection and actual data @@ -113,7 +113,7 @@ def _chunks( read_amount = min( self.chunk_size, - required_length - bytes_received, + max_length - bytes_received, ) # NOTE: This may not handle some servers adding a @@ -131,13 +131,13 @@ def _chunks( yield data - if bytes_received >= required_length: + if bytes_received >= max_length: break logger.debug( "Downloaded %d out of %d bytes", bytes_received, - required_length, + max_length, ) except urllib3.exceptions.ReadTimeoutError as e: diff --git a/tuf/ngclient/fetcher.py b/tuf/ngclient/fetcher.py index 375e60d31b..89d5a98473 100644 --- a/tuf/ngclient/fetcher.py +++ b/tuf/ngclient/fetcher.py @@ -29,15 +29,15 @@ class FetcherInterface: __metaclass__ = abc.ABCMeta @abc.abstractmethod - def fetch(self, url: str, required_length: int) -> Iterator[bytes]: + def fetch(self, url: str, max_length: int) -> Iterator[bytes]: """Fetches the contents of HTTP/HTTPS url from a remote server. - Ensures the length of the downloaded data is up to 'required_length'. + Ensures the length of the downloaded data is up to 'max_length'. Arguments: url: A URL string that represents a file location. - required_length: An integer value representing the file length in - bytes. + max_length: An integer value representing the maximum + number of bytes to be downloaded. Raises: tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving @@ -50,21 +50,20 @@ def fetch(self, url: str, required_length: int) -> Iterator[bytes]: raise NotImplementedError # pragma: no cover @contextmanager - def download_file(self, url: str, required_length: int) -> Iterator[IO]: + def download_file(self, url: str, max_length: int) -> Iterator[IO]: """Opens a connection to 'url' and downloads the content - up to 'required_length'. + up to 'max_length'. Args: url: a URL string that represents the location of the file. - required_length: an integer value representing the length of - the file or an upper boundary. + max_length: an integer value representing the length of + the file or an upper bound. Raises: - DownloadLengthMismatchError: a mismatch of observed vs expected - lengths while downloading the file. + DownloadLengthMismatchError: downloaded bytes exceed 'max_length'. Yields: - A file object that points to the contents of 'url'. + A TemporaryFile object that points to the contents of 'url'. """ # 'url.replace('\\', '/')' is needed for compatibility with # Windows-based systems, because they might use back-slashes in place @@ -78,21 +77,21 @@ def download_file(self, url: str, required_length: int) -> Iterator[IO]: number_of_bytes_received = 0 with tempfile.TemporaryFile() as temp_file: - chunks = self.fetch(url, required_length) + chunks = self.fetch(url, max_length) for chunk in chunks: temp_file.write(chunk) number_of_bytes_received += len(chunk) - if number_of_bytes_received > required_length: + if number_of_bytes_received > max_length: raise exceptions.DownloadLengthMismatchError( - required_length, number_of_bytes_received + max_length, number_of_bytes_received ) temp_file.seek(0) yield temp_file - def download_bytes(self, url: str, required_length: int) -> bytes: + def download_bytes(self, url: str, max_length: int) -> bytes: """Download bytes from given url Returns the downloaded bytes, otherwise like download_file() """ - with self.download_file(url, required_length) as dl_file: + with self.download_file(url, max_length) as dl_file: return dl_file.read()