Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resume incomplete download #11180

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/11180.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support to resume incomplete download. The behavior can be controlled using flags ``--incomplete-downloads`` and ``--incomplete-download-retries``.
21 changes: 21 additions & 0 deletions src/pip/_internal/cli/cmdoptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,25 @@ def check_list_path_option(options: Values) -> None:
help=("Enable deprecated functionality, that will be removed in the future."),
)

incomplete_downloads: Callable[..., Option] = partial(
Option,
"--incomplete-downloads",
dest="resume_incomplete",
choices=["resume", "discard"],
default="discard",
metavar="policy",
help="How to handle an incomplete download: resume, discard (default to %default).",
)

incomplete_download_retries: Callable[..., Option] = partial(
Option,
"--incomplete-download-retries",
dest="resume_attempts",
type="int",
default=5,
help="Maximum number of resumption retries for incomplete download "
"(default %default times).",
)

##########
# groups #
Expand Down Expand Up @@ -1048,6 +1067,8 @@ def check_list_path_option(options: Values) -> None:
no_python_version_warning,
use_new_feature,
use_deprecated_feature,
incomplete_downloads,
incomplete_download_retries,
],
}

Expand Down
12 changes: 10 additions & 2 deletions src/pip/_internal/cli/progress_bars.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def _rich_progress_bar(
*,
bar_type: str,
size: int,
initial_progress: Optional[int] = None,
) -> Generator[bytes, None, None]:
assert bar_type == "on", "This should only be used in the default mode."

Expand All @@ -49,20 +50,27 @@ def _rich_progress_bar(

progress = Progress(*columns, refresh_per_second=30)
task_id = progress.add_task(" " * (get_indentation() + 2), total=total)
if initial_progress is not None:
progress.update(task_id, advance=initial_progress)
with progress:
for chunk in iterable:
yield chunk
progress.update(task_id, advance=len(chunk))


def get_download_progress_renderer(
*, bar_type: str, size: Optional[int] = None
*, bar_type: str, size: Optional[int] = None, initial_progress: Optional[int] = None
) -> DownloadProgressRenderer:
"""Get an object that can be used to render the download progress.
Returns a callable, that takes an iterable to "wrap".
"""
if bar_type == "on":
return functools.partial(_rich_progress_bar, bar_type=bar_type, size=size)
return functools.partial(
_rich_progress_bar,
bar_type=bar_type,
size=size,
initial_progress=initial_progress,
)
else:
return iter # no-op, when passed an iterator
4 changes: 4 additions & 0 deletions src/pip/_internal/cli/req_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ def make_requirement_preparer(
"fast-deps has no effect when used with the legacy resolver."
)

resume_incomplete = options.resume_incomplete == "resume"

return RequirementPreparer(
build_dir=temp_build_dir_path,
src_dir=options.src_dir,
Expand All @@ -319,6 +321,8 @@ def make_requirement_preparer(
use_user_site=use_user_site,
lazy_wheel=lazy_wheel,
verbosity=verbosity,
resume_incomplete=resume_incomplete,
resume_attempts=options.resume_attempts,
)

@classmethod
Expand Down
34 changes: 34 additions & 0 deletions src/pip/_internal/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,3 +656,37 @@ def __str__(self) -> str:
assert self.error is not None
message_part = f".\n{self.error}\n"
return f"Configuration file {self.reason}{message_part}"


class IncompleteDownloadError(DiagnosticPipError):
"""Raised when the downloader receives fewer bytes than advertised
in the Content-Length header."""

reference = "incomplete-download-error"

def __init__(
self, link: str, resume_incomplete: bool, resume_attempts: int
) -> None:
if resume_incomplete:
message = (
"Download failed after {} attempts because not enough bytes are"
" received. The incomplete file has been cleaned up."
).format(resume_attempts)
hint = "Use --incomplete-download-retries to configure resume retry limit."
else:
message = (
"Download failed because not enough bytes are received."
" The incomplete file has been cleaned up."
)
hint = (
"Use --incomplete-downloads=resume to make pip retry failed download."
)

super().__init__(
message=message,
context="File: {}\n"
"Resume failed download: {}\n"
"Resume retry limit: {}".format(link, resume_incomplete, resume_attempts),
hint_stmt=hint,
note_stmt="This is an issue with network connectivity, not pip.",
)
148 changes: 110 additions & 38 deletions src/pip/_internal/network/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
import logging
import mimetypes
import os
from http import HTTPStatus
from typing import Iterable, Optional, Tuple

from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response

from pip._internal.cli.progress_bars import get_download_progress_renderer
from pip._internal.exceptions import NetworkConnectionError
from pip._internal.exceptions import IncompleteDownloadError, NetworkConnectionError
from pip._internal.models.index import PyPI
from pip._internal.models.link import Link
from pip._internal.network.cache import is_from_cache
Expand All @@ -27,13 +28,21 @@ def _get_http_response_size(resp: Response) -> Optional[int]:
return None


def _get_http_response_etag_or_date(resp: Response) -> Optional[str]:
"""
Return either the ETag or Date header (or None if neither exists).
The return value can be used in an If-Range header.
"""
return resp.headers.get("etag", resp.headers.get("date"))


def _prepare_download(
resp: Response,
link: Link,
progress_bar: str,
total_length: Optional[int],
range_start: Optional[int] = None,
) -> Iterable[bytes]:
total_length = _get_http_response_size(resp)

if link.netloc == PyPI.file_storage_domain:
url = link.show_url
else:
Expand All @@ -42,10 +51,17 @@ def _prepare_download(
logged_url = redact_auth_from_url(url)

if total_length:
logged_url = "{} ({})".format(logged_url, format_size(total_length))
if range_start is not None:
logged_url = "{} ({}/{})".format(
logged_url, format_size(range_start), format_size(total_length)
)
else:
logged_url = "{} ({})".format(logged_url, format_size(total_length))

if is_from_cache(resp):
logger.info("Using cached %s", logged_url)
elif range_start is not None:
logger.info("Resume download %s", logged_url)
else:
logger.info("Downloading %s", logged_url)

Expand All @@ -65,7 +81,9 @@ def _prepare_download(
if not show_progress:
return chunks

renderer = get_download_progress_renderer(bar_type=progress_bar, size=total_length)
renderer = get_download_progress_renderer(
bar_type=progress_bar, size=total_length, initial_progress=range_start
)
return renderer(chunks)


Expand Down Expand Up @@ -112,10 +130,27 @@ def _get_http_response_filename(resp: Response, link: Link) -> str:
return filename


def _http_get_download(session: PipSession, link: Link) -> Response:
def _http_get_download(
session: PipSession,
link: Link,
range_start: Optional[int] = None,
if_range: Optional[str] = None,
) -> Response:
target_url = link.url.split("#", 1)[0]
resp = session.get(target_url, headers=HEADERS, stream=True)
raise_for_status(resp)
headers = {**HEADERS}
# request a partial download
if range_start is not None:
headers["Range"] = "bytes={}-".format(range_start)
# make sure the file hasn't changed
if if_range is not None:
headers["If-Range"] = if_range
try:
resp = session.get(target_url, headers=headers, stream=True)
raise_for_status(resp)
except NetworkConnectionError as e:
assert e.response is not None
logger.critical("HTTP error %s while getting %s", e.response.status_code, link)
raise
return resp


Expand All @@ -124,28 +159,80 @@ def __init__(
self,
session: PipSession,
progress_bar: str,
resume_incomplete: bool,
resume_attempts: int,
) -> None:
self._session = session
self._progress_bar = progress_bar
self._resume_incomplete = resume_incomplete
assert (
resume_attempts > 0
), "Number of max incomplete download retries must be positive"
self._resume_attempts = resume_attempts

def __call__(self, link: Link, location: str) -> Tuple[str, str]:
"""Download the file given by link into location."""
try:
resp = _http_get_download(self._session, link)
except NetworkConnectionError as e:
assert e.response is not None
logger.critical(
"HTTP error %s while getting %s", e.response.status_code, link
)
raise
resp = _http_get_download(self._session, link)
total_length = _get_http_response_size(resp)
etag_or_date = _get_http_response_etag_or_date(resp)

filename = _get_http_response_filename(resp, link)
filepath = os.path.join(location, filename)

chunks = _prepare_download(resp, link, self._progress_bar)
chunks = _prepare_download(resp, link, self._progress_bar, total_length)
bytes_received = 0

with open(filepath, "wb") as content_file:

# Process the initial response
for chunk in chunks:
bytes_received += len(chunk)
content_file.write(chunk)

if self._resume_incomplete:
attempts_left = self._resume_attempts

while total_length is not None and bytes_received < total_length:
if attempts_left <= 0:
break
attempts_left -= 1

# Attempt to resume download
resume_resp = _http_get_download(
self._session,
link,
range_start=bytes_received,
if_range=etag_or_date,
)

restart = resume_resp.status_code != HTTPStatus.PARTIAL_CONTENT
# If the server responded with 200 (e.g. when the file has been
# modifiedon the server or the server doesn't support range
# requests), reset the download to start from the beginning.
if restart:
content_file.seek(0)
content_file.truncate()
bytes_received = 0
total_length = _get_http_response_size(resume_resp)
etag_or_date = _get_http_response_etag_or_date(resume_resp)

chunks = _prepare_download(
resume_resp,
link,
self._progress_bar,
total_length,
range_start=bytes_received,
)
for chunk in chunks:
bytes_received += len(chunk)
content_file.write(chunk)

if total_length is not None and bytes_received < total_length:
os.remove(filepath)
raise IncompleteDownloadError(
str(link), self._resume_incomplete, self._resume_attempts
)

content_type = resp.headers.get("Content-Type", "")
return filepath, content_type

Expand All @@ -155,32 +242,17 @@ def __init__(
self,
session: PipSession,
progress_bar: str,
resume_incomplete: bool,
resume_attempts: int,
) -> None:
self._session = session
self._progress_bar = progress_bar
self._downloader = Downloader(
session, progress_bar, resume_incomplete, resume_attempts
)

def __call__(
self, links: Iterable[Link], location: str
) -> Iterable[Tuple[Link, Tuple[str, str]]]:
"""Download the files given by links into location."""
for link in links:
try:
resp = _http_get_download(self._session, link)
except NetworkConnectionError as e:
assert e.response is not None
logger.critical(
"HTTP error %s while getting %s",
e.response.status_code,
link,
)
raise

filename = _get_http_response_filename(resp, link)
filepath = os.path.join(location, filename)

chunks = _prepare_download(resp, link, self._progress_bar)
with open(filepath, "wb") as content_file:
for chunk in chunks:
content_file.write(chunk)
content_type = resp.headers.get("Content-Type", "")
filepath, content_type = self._downloader(link, location)
yield link, (filepath, content_type)
10 changes: 8 additions & 2 deletions src/pip/_internal/operations/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,15 +221,21 @@ def __init__(
use_user_site: bool,
lazy_wheel: bool,
verbosity: int,
resume_incomplete: bool,
resume_attempts: int,
) -> None:
super().__init__()

self.src_dir = src_dir
self.build_dir = build_dir
self.build_tracker = build_tracker
self._session = session
self._download = Downloader(session, progress_bar)
self._batch_download = BatchDownloader(session, progress_bar)
self._download = Downloader(
session, progress_bar, resume_incomplete, resume_attempts
)
self._batch_download = BatchDownloader(
session, progress_bar, resume_incomplete, resume_attempts
)
self.finder = finder

# Where still-packed archives should be written to. If None, they are
Expand Down
Loading