mirror of https://github.com/python/cpython.git
GH-127236: `pathname2url()`: generate RFC 1738 URL for absolute POSIX path (#127194)
When handed an absolute Windows path such as `C:\foo` or `//server/share`, the `urllib.request.pathname2url()` function returns a URL with an authority section, such as `///C:/foo` or `//server/share` (or before GH-126205, `////server/share`). Only the `file:` prefix is omitted. But when handed an absolute POSIX path such as `/etc/hosts`, or a Windows path of the same form (rooted but lacking a drive), the function returns a URL without an authority section, such as `/etc/hosts`. This patch corrects the discrepancy by adding a `//` prefix before drive-less, rooted paths when generating URLs.
This commit is contained in:
parent
a2ee899682
commit
5bb059fe60
|
@ -159,12 +159,14 @@ The :mod:`urllib.request` module defines the following functions:
|
|||
'file:///C:/Program%20Files'
|
||||
|
||||
.. versionchanged:: 3.14
|
||||
Windows drive letters are no longer converted to uppercase.
|
||||
Paths beginning with a slash are converted to URLs with authority
|
||||
sections. For example, the path ``/etc/hosts`` is converted to
|
||||
the URL ``///etc/hosts``.
|
||||
|
||||
.. versionchanged:: 3.14
|
||||
On Windows, ``:`` characters not following a drive letter are quoted. In
|
||||
previous versions, :exc:`OSError` was raised if a colon character was
|
||||
found in any position other than the second character.
|
||||
Windows drive letters are no longer converted to uppercase, and ``:``
|
||||
characters not following a drive letter no longer cause an
|
||||
:exc:`OSError` exception to be raised on Windows.
|
||||
|
||||
|
||||
.. function:: url2pathname(url)
|
||||
|
|
|
@ -55,13 +55,17 @@ def pathname2url(p):
|
|||
p = p[4:]
|
||||
if p[:4].upper() == 'UNC/':
|
||||
p = '//' + p[4:]
|
||||
drive, tail = ntpath.splitdrive(p)
|
||||
if drive[1:] == ':':
|
||||
# DOS drive specified. Add three slashes to the start, producing
|
||||
# an authority section with a zero-length authority, and a path
|
||||
# section starting with a single slash.
|
||||
drive = f'///{drive}'
|
||||
drive, root, tail = ntpath.splitroot(p)
|
||||
if drive:
|
||||
if drive[1:] == ':':
|
||||
# DOS drive specified. Add three slashes to the start, producing
|
||||
# an authority section with a zero-length authority, and a path
|
||||
# section starting with a single slash.
|
||||
drive = f'///{drive}'
|
||||
drive = urllib.parse.quote(drive, safe='/:')
|
||||
elif root:
|
||||
# Add explicitly empty authority to path beginning with one slash.
|
||||
root = f'//{root}'
|
||||
|
||||
drive = urllib.parse.quote(drive, safe='/:')
|
||||
tail = urllib.parse.quote(tail)
|
||||
return drive + tail
|
||||
return drive + root + tail
|
||||
|
|
|
@ -1434,7 +1434,7 @@ def test_pathname2url_win(self):
|
|||
self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar')
|
||||
self.assertEqual(fn('foo:bar'), 'foo%3Abar')
|
||||
# No drive letter
|
||||
self.assertEqual(fn("\\folder\\test\\"), '/folder/test/')
|
||||
self.assertEqual(fn("\\folder\\test\\"), '///folder/test/')
|
||||
self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/')
|
||||
self.assertEqual(fn("\\\\\\folder\\test\\"), '///folder/test/')
|
||||
self.assertEqual(fn('\\\\some\\share\\'), '//some/share/')
|
||||
|
@ -1447,7 +1447,7 @@ def test_pathname2url_win(self):
|
|||
self.assertEqual(fn('//?/unc/server/share/dir'), '//server/share/dir')
|
||||
# Round-tripping
|
||||
urls = ['///C:',
|
||||
'/folder/test/',
|
||||
'///folder/test/',
|
||||
'///C:/foo/bar/spam.foo']
|
||||
for url in urls:
|
||||
self.assertEqual(fn(urllib.request.url2pathname(url)), url)
|
||||
|
@ -1456,12 +1456,12 @@ def test_pathname2url_win(self):
|
|||
'test specific to POSIX pathnames')
|
||||
def test_pathname2url_posix(self):
|
||||
fn = urllib.request.pathname2url
|
||||
self.assertEqual(fn('/'), '/')
|
||||
self.assertEqual(fn('/a/b.c'), '/a/b.c')
|
||||
self.assertEqual(fn('/'), '///')
|
||||
self.assertEqual(fn('/a/b.c'), '///a/b.c')
|
||||
self.assertEqual(fn('//a/b.c'), '////a/b.c')
|
||||
self.assertEqual(fn('///a/b.c'), '/////a/b.c')
|
||||
self.assertEqual(fn('////a/b.c'), '//////a/b.c')
|
||||
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
|
||||
self.assertEqual(fn('/a/b%#c'), '///a/b%25%23c')
|
||||
|
||||
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
|
||||
def test_pathname2url_nonascii(self):
|
||||
|
|
|
@ -1667,9 +1667,11 @@ def url2pathname(pathname):
|
|||
def pathname2url(pathname):
|
||||
"""OS-specific conversion from a file system path to a relative URL
|
||||
of the 'file' scheme; not recommended for general use."""
|
||||
if pathname[:2] == '//':
|
||||
# Add explicitly empty authority to avoid interpreting the path
|
||||
# as authority.
|
||||
if pathname[:1] == '/':
|
||||
# Add explicitly empty authority to absolute path. If the path
|
||||
# starts with exactly one slash then this change is mostly
|
||||
# cosmetic, but if it begins with two or more slashes then this
|
||||
# avoids interpreting the path as a URL authority.
|
||||
pathname = '//' + pathname
|
||||
encoding = sys.getfilesystemencoding()
|
||||
errors = sys.getfilesystemencodeerrors()
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
:func:`urllib.request.pathname2url` now adds an empty authority when
|
||||
generating a URL for a path that begins with exactly one slash. For example,
|
||||
the path ``/etc/hosts`` is converted to the scheme-less URL ``///etc/hosts``.
|
||||
As a result of this change, URLs without authorities are only generated for
|
||||
relative paths.
|
Loading…
Reference in New Issue