From 80609e8c20a8db26c97037f252b29307ab44b0e2 Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sun, 30 Jan 2022 16:13:59 +0000 Subject: [PATCH] Properly yield results from `html5lib` parsing The earlier variant _returned_ an iterable object from a generator. This did not properly handle the fallback, resulting in the html5lib code path not being executed. --- news/10846.bugfix.rst | 1 + src/pip/_internal/index/collector.py | 3 ++- tests/unit/test_collector.py | 12 ++++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 news/10846.bugfix.rst diff --git a/news/10846.bugfix.rst b/news/10846.bugfix.rst new file mode 100644 index 00000000000..63c35750d6a --- /dev/null +++ b/news/10846.bugfix.rst @@ -0,0 +1 @@ +Properly handle links parsed by html5lib, when using ```--use-deprecated=html5lib``. diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index c30c37661f5..ead5fcf7c8e 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -343,7 +343,8 @@ def parse_links(page: "HTMLPage", use_deprecated_html5lib: bool) -> Iterable[Lin Parse an HTML document, and yield its anchor elements as Link objects. """ if use_deprecated_html5lib: - return _parse_links_html5lib(page) + yield from _parse_links_html5lib(page) + return parser = HTMLLinkParser() encoding = page.encoding or "utf-8" diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 2225a32bf45..4b51d1cff98 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -539,6 +539,18 @@ def test_parse_links_caches_same_page_by_url() -> None: assert "pkg2" in parsed_links_3[0].url +def test_parse_link_handles_deprecated_usage_properly() -> None: + html = b'' + url = "https://example.com/simple/" + page = HTMLPage(html, encoding=None, url=url) + + parsed_links = list(parse_links(page, use_deprecated_html5lib=True)) + + assert len(parsed_links) == 2 + assert "pkg1-1.0" in parsed_links[0].url + assert "pkg1-2.0" in parsed_links[1].url + + @mock.patch("pip._internal.index.collector.raise_for_status") def test_request_http_error( mock_raise_for_status: mock.Mock, caplog: pytest.LogCaptureFixture