[3.13] GH-85168: Use filesystem encoding when converting to/from `file` URIs (GH-126852) (#127039)

GH-85168: Use filesystem encoding when converting to/from `file` URIs (GH-126852)

Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the
filesystem encoding when quoting and unquoting file URIs, rather than
forcing use of UTF-8.

No changes are needed in the `nturl2path` module because Windows always
uses UTF-8, per PEP 529.
(cherry picked from commit c9b399fbdb)

Co-authored-by: Barney Gale <barney.gale@gmail.com>
This commit is contained in:
Miss Islington (bot) 2024-11-20 21:10:29 +01:00 committed by GitHub
parent 40b9b506fb
commit 746a0c5bc8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 26 additions and 10 deletions

View File

@ -709,10 +709,6 @@ def tearDown(self):
def constructLocalFileUrl(self, filePath):
filePath = os.path.abspath(filePath)
try:
filePath.encode("utf-8")
except UnicodeEncodeError:
raise unittest.SkipTest("filePath is not encodable to utf8")
return "file://%s" % urllib.request.pathname2url(filePath)
def createNewTempFile(self, data=b""):
@ -1562,6 +1558,13 @@ def test_pathname2url_posix(self):
self.assertEqual(fn('/a/b.c'), '/a/b.c')
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
def test_pathname2url_nonascii(self):
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors)
self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url)
@unittest.skipUnless(sys.platform == 'win32',
'test specific to Windows pathnames.')
def test_url2pathname_win(self):
@ -1612,6 +1615,15 @@ def test_url2pathname_posix(self):
self.assertEqual(fn('////foo/bar'), '//foo/bar')
self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
def test_url2pathname_nonascii(self):
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
url = os_helper.FS_NONASCII
self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
url = urllib.parse.quote(url, encoding=encoding, errors=errors)
self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
class Utility_Tests(unittest.TestCase):
"""Testcase to test the various utility functions in the urllib."""

View File

@ -717,10 +717,6 @@ def test_processors(self):
def sanepathname2url(path):
try:
path.encode("utf-8")
except UnicodeEncodeError:
raise unittest.SkipTest("path is not encodable to utf8")
urlpath = urllib.request.pathname2url(path)
if os.name == "nt" and urlpath.startswith("///"):
urlpath = urlpath[2:]

View File

@ -1660,12 +1660,16 @@ def url2pathname(pathname):
# URL has an empty authority section, so the path begins on the
# third character.
pathname = pathname[2:]
return unquote(pathname)
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
return unquote(pathname, encoding=encoding, errors=errors)
def pathname2url(pathname):
"""OS-specific conversion from a file system path to a relative URL
of the 'file' scheme; not recommended for general use."""
return quote(pathname)
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
return quote(pathname, encoding=encoding, errors=errors)
ftpcache = {}

View File

@ -0,0 +1,4 @@
Fix issue where :func:`urllib.request.url2pathname` and
:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
unquoting file URIs. They now use the :term:`filesystem encoding and error
handler`.