From 97fb2f722297ae4e12e36dab024e0acf8477b3c8 Mon Sep 17 00:00:00 2001 From: David Lord Date: Sun, 5 May 2024 08:55:42 -0700 Subject: [PATCH] remove _invalid_iri_to_uri workaround tell Python to handle itms-services scheme correctly --- CHANGES.rst | 3 +++ src/werkzeug/urls.py | 25 ++++++------------------- src/werkzeug/wrappers/response.py | 3 +-- tests/test_urls.py | 6 ++++++ tests/test_wrappers.py | 1 + 5 files changed, 17 insertions(+), 21 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 279fd3f7d..367cfb668 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,6 +7,9 @@ Unreleased - Make reloader more robust when ``""`` is in ``sys.path``. :pr:`2823` - Better TLS cert format with ``adhoc`` dev certs. :pr:`2891` +- Inform Python < 3.12 how to handle ``itms-services`` URIs correctly, rather + than using an overly-broad workaround in Werkzeug that caused some redirect + URIs to be passed on without encoding. :issue:`2828` Version 3.0.2 diff --git a/src/werkzeug/urls.py b/src/werkzeug/urls.py index 4d61e600b..5bffe3928 100644 --- a/src/werkzeug/urls.py +++ b/src/werkzeug/urls.py @@ -3,6 +3,7 @@ import codecs import re import typing as t +import urllib.parse from urllib.parse import quote from urllib.parse import unquote from urllib.parse import urlencode @@ -164,25 +165,11 @@ def iri_to_uri(iri: str) -> str: return urlunsplit((parts.scheme, netloc, path, query, fragment)) -def _invalid_iri_to_uri(iri: str) -> str: - """The URL scheme ``itms-services://`` must contain the ``//`` even though it does - not have a host component. There may be other invalid schemes as well. Currently, - responses will always call ``iri_to_uri`` on the redirect ``Location`` header, which - removes the ``//``. For now, if the IRI only contains ASCII and does not contain - spaces, pass it on as-is. In Werkzeug 3.0, this should become a - ``response.process_location`` flag. - - :meta private: - """ - try: - iri.encode("ascii") - except UnicodeError: - pass - else: - if len(iri.split(None, 1)) == 1: - return iri - - return iri_to_uri(iri) +# Python < 3.12 +# itms-services was worked around in previous iri_to_uri implementations, but +# we can tell Python directly that it needs to preserve the //. +if "itms-services" not in urllib.parse.uses_netloc: + urllib.parse.uses_netloc.append("itms-services") def _decode_idna(domain: str) -> str: diff --git a/src/werkzeug/wrappers/response.py b/src/werkzeug/wrappers/response.py index 7b666e3e8..7f01287c7 100644 --- a/src/werkzeug/wrappers/response.py +++ b/src/werkzeug/wrappers/response.py @@ -14,7 +14,6 @@ from ..http import parse_range_header from ..http import remove_entity_headers from ..sansio.response import Response as _SansIOResponse -from ..urls import _invalid_iri_to_uri from ..urls import iri_to_uri from ..utils import cached_property from ..wsgi import _RangeWrapper @@ -479,7 +478,7 @@ def get_wsgi_headers(self, environ: WSGIEnvironment) -> Headers: content_length = value if location is not None: - location = _invalid_iri_to_uri(location) + location = iri_to_uri(location) if self.autocorrect_location_header: # Make the location header an absolute URL. diff --git a/tests/test_urls.py b/tests/test_urls.py index fdaa913a6..101b886ec 100644 --- a/tests/test_urls.py +++ b/tests/test_urls.py @@ -98,3 +98,9 @@ def test_iri_to_uri_dont_quote_valid_code_points(): # [] are not valid URL code points according to WhatWG URL Standard # https://url.spec.whatwg.org/#url-code-points assert urls.iri_to_uri("/path[bracket]?(paren)") == "/path%5Bbracket%5D?(paren)" + + +# Python < 3.12 +def test_itms_services() -> None: + url = "itms-services://?action=download-manifest&url=https://test.example/path" + assert urls.iri_to_uri(url) == url diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index d7bc12b95..f75694459 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -1154,6 +1154,7 @@ class MyResponse(wrappers.Response): ("auto", "location", "expect"), ( (False, "/test", "/test"), + (False, "/\\\\test.example?q", "/%5C%5Ctest.example?q"), (True, "/test", "http://localhost/test"), (True, "test", "http://localhost/a/b/test"), (True, "./test", "http://localhost/a/b/test"),