diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py
index 2f957e69e90..ff722cfeb85 100644
--- a/src/pip/_internal/index/collector.py
+++ b/src/pip/_internal/index/collector.py
@@ -41,7 +41,6 @@
from pip._internal.models.search_scope import SearchScope
from pip._internal.network.session import PipSession
from pip._internal.network.utils import raise_for_status
-from pip._internal.utils.deprecation import deprecated
from pip._internal.utils.filetypes import is_archive_file
from pip._internal.utils.misc import pairwise, redact_auth_from_url
from pip._internal.vcs import vcs
@@ -346,34 +345,13 @@ def parse_links(page: "HTMLPage", use_deprecated_html5lib: bool) -> Iterable[Lin
"""
Parse an HTML document, and yield its anchor elements as Link objects.
"""
- encoding = page.encoding or "utf-8"
-
- # Check if the page starts with a valid doctype, to decide whether to use
- # http.parser or (deprecated) html5lib for parsing -- unless explicitly
- # requested to use html5lib.
- if not use_deprecated_html5lib:
- expected_doctype = "".encode(encoding)
- actual_start = page.content[: len(expected_doctype)]
- if actual_start.decode(encoding).lower() != "":
- deprecated(
- reason=(
- f"The HTML index page being used ({page.url}) is not a proper "
- "HTML 5 document. This is in violation of PEP 503 which requires "
- "these pages to be well-formed HTML 5 documents. Please reach out "
- "to the owners of this index page, and ask them to update this "
- "index page to a valid HTML 5 document."
- ),
- replacement=None,
- gone_in="22.2",
- issue=10825,
- )
- use_deprecated_html5lib = True
if use_deprecated_html5lib:
yield from _parse_links_html5lib(page)
return
- parser = HTMLLinkParser()
+ parser = HTMLLinkParser(page.url)
+ encoding = page.encoding or "utf-8"
parser.feed(page.content.decode(encoding))
url = page.url
diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py
index 403da9c4005..219e0c0cc4c 100644
--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@@ -551,21 +551,42 @@ def test_parse_link_handles_deprecated_usage_properly() -> None:
assert "pkg1-2.0" in parsed_links[1].url
-@mock.patch("pip._internal.index.collector.deprecated")
-def test_parse_links_presents_deprecation_warning_on_non_html5_page(
- mock_deprecated: mock.Mock,
+def test_parse_links_presents_warning_on_missing_doctype(
+ caplog: pytest.LogCaptureFixture,
) -> None:
html = b''
url = "https://example.com/simple/"
page = HTMLPage(html, encoding=None, url=url, cache_link_parsing=False)
- parsed_links = list(parse_links(page, use_deprecated_html5lib=False))
+ with caplog.at_level(logging.WARN):
+ parsed_links = list(parse_links(page, use_deprecated_html5lib=False))
assert len(parsed_links) == 2, parsed_links
assert "pkg1-1.0" in parsed_links[0].url
assert "pkg1-2.0" in parsed_links[1].url
- mock_deprecated.assert_called_once()
+ assert len(caplog.records) == 1
+
+
+def test_parse_links_presents_warning_on_html4_doctype(
+ caplog: pytest.LogCaptureFixture,
+) -> None:
+ html = (
+ b''
+ b''
+ )
+ url = "https://example.com/simple/"
+ page = HTMLPage(html, encoding=None, url=url, cache_link_parsing=False)
+
+ with caplog.at_level(logging.WARN):
+ parsed_links = list(parse_links(page, use_deprecated_html5lib=False))
+
+ assert len(parsed_links) == 2, parsed_links
+ assert "pkg1-1.0" in parsed_links[0].url
+ assert "pkg1-2.0" in parsed_links[1].url
+
+ assert len(caplog.records) == 1
@mock.patch("pip._internal.index.collector.raise_for_status")