From 40b9b506fb88c5cb6d36617015d74b2cf21da43d Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Wed, 20 Nov 2024 20:44:17 +0100 Subject: [PATCH 1/2] [3.13] Run `apt update` before `apt install git` in autoconf CI job (GH-127070) (cherry picked from commit 0af4ec30bd2e3a52350344d1011c0c125d6dcd71) Co-authored-by: Zachary Ware --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 19c59c53c99e14..870931e5cbbbac 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -96,7 +96,7 @@ jobs: steps: - name: Install Git run: | - apt install git -yq + apt update && apt install git -yq git config --global --add safe.directory "$GITHUB_WORKSPACE" - uses: actions/checkout@v4 with: From 746a0c5bc8979d6265e9669987eff52e7631222a Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Wed, 20 Nov 2024 21:10:29 +0100 Subject: [PATCH 2/2] [3.13] GH-85168: Use filesystem encoding when converting to/from `file` URIs (GH-126852) (#127039) GH-85168: Use filesystem encoding when converting to/from `file` URIs (GH-126852) Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the filesystem encoding when quoting and unquoting file URIs, rather than forcing use of UTF-8. No changes are needed in the `nturl2path` module because Windows always uses UTF-8, per PEP 529. (cherry picked from commit c9b399fbdb01584dcfff0d7f6ad484644ff269c3) Co-authored-by: Barney Gale --- Lib/test/test_urllib.py | 20 +++++++++++++++---- Lib/test/test_urllib2.py | 4 ---- Lib/urllib/request.py | 8 ++++++-- ...4-11-15-01-50-36.gh-issue-85168.bP8VIN.rst | 4 ++++ 4 files changed, 26 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 2c53ce3f99e675..ab18e80663e3bc 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -709,10 +709,6 @@ def tearDown(self): def constructLocalFileUrl(self, filePath): filePath = os.path.abspath(filePath) - try: - filePath.encode("utf-8") - except UnicodeEncodeError: - raise unittest.SkipTest("filePath is not encodable to utf8") return "file://%s" % urllib.request.pathname2url(filePath) def createNewTempFile(self, data=b""): @@ -1562,6 +1558,13 @@ def test_pathname2url_posix(self): self.assertEqual(fn('/a/b.c'), '/a/b.c') self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c') + @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') + def test_pathname2url_nonascii(self): + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors) + self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url) + @unittest.skipUnless(sys.platform == 'win32', 'test specific to Windows pathnames.') def test_url2pathname_win(self): @@ -1612,6 +1615,15 @@ def test_url2pathname_posix(self): self.assertEqual(fn('////foo/bar'), '//foo/bar') self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar') + @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') + def test_url2pathname_nonascii(self): + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + url = os_helper.FS_NONASCII + self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII) + url = urllib.parse.quote(url, encoding=encoding, errors=errors) + self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII) + class Utility_Tests(unittest.TestCase): """Testcase to test the various utility functions in the urllib.""" diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index eed0599642edfb..068dd859f27220 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -717,10 +717,6 @@ def test_processors(self): def sanepathname2url(path): - try: - path.encode("utf-8") - except UnicodeEncodeError: - raise unittest.SkipTest("path is not encodable to utf8") urlpath = urllib.request.pathname2url(path) if os.name == "nt" and urlpath.startswith("///"): urlpath = urlpath[2:] diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 9eb819ca53229f..f0321814c69509 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1660,12 +1660,16 @@ def url2pathname(pathname): # URL has an empty authority section, so the path begins on the # third character. pathname = pathname[2:] - return unquote(pathname) + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + return unquote(pathname, encoding=encoding, errors=errors) def pathname2url(pathname): """OS-specific conversion from a file system path to a relative URL of the 'file' scheme; not recommended for general use.""" - return quote(pathname) + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + return quote(pathname, encoding=encoding, errors=errors) ftpcache = {} diff --git a/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst b/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst new file mode 100644 index 00000000000000..abceda8f6fd707 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst @@ -0,0 +1,4 @@ +Fix issue where :func:`urllib.request.url2pathname` and +:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and +unquoting file URIs. They now use the :term:`filesystem encoding and error +handler`.