From a959ea1b0a026ff118975b9b539513b06dde3190 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 22 Dec 2024 01:17:59 +0000 Subject: [PATCH] GH-127807: pathlib ABCs: remove `PurePathBase._raw_paths` (#127883) Remove the `PurePathBase` initializer, and make `with_segments()` and `__str__()` abstract. This allows us to drop the `_raw_paths` attribute, and also the `Parser.join()` protocol method. --- Lib/pathlib/_abc.py | 40 +++-------- Lib/pathlib/_local.py | 37 ++++++++-- Lib/pathlib/_types.py | 1 - Lib/test/test_pathlib/test_pathlib.py | 25 +++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 85 ++++++++--------------- 5 files changed, 92 insertions(+), 96 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index b4560295300..4402efe3a02 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -44,49 +44,25 @@ class PurePathBase: """Base class for pure path objects. This class *does not* provide several magic methods that are defined in - its subclass PurePath. They are: __fspath__, __bytes__, __reduce__, - __hash__, __eq__, __lt__, __le__, __gt__, __ge__. Its initializer and path - joining methods accept only strings, not os.PathLike objects more broadly. + its subclass PurePath. They are: __init__, __fspath__, __bytes__, + __reduce__, __hash__, __eq__, __lt__, __le__, __gt__, __ge__. """ - __slots__ = ( - # The `_raw_paths` slot stores unjoined string paths. This is set in - # the `__init__()` method. - '_raw_paths', - ) + __slots__ = () parser = posixpath _globber = PathGlobber - def __init__(self, *args): - for arg in args: - if not isinstance(arg, str): - raise TypeError( - f"argument should be a str, not {type(arg).__name__!r}") - self._raw_paths = list(args) - def with_segments(self, *pathsegments): """Construct a new path object from any number of path-like objects. Subclasses may override this method to customize how new path objects are created from methods like `iterdir()`. """ - return type(self)(*pathsegments) + raise NotImplementedError def __str__(self): """Return the string representation of the path, suitable for passing to system calls.""" - paths = self._raw_paths - if len(paths) == 1: - return paths[0] - elif paths: - # Join path segments from the initializer. - path = self.parser.join(*paths) - # Cache the joined path. - paths.clear() - paths.append(path) - return path - else: - paths.append('') - return '' + raise NotImplementedError def as_posix(self): """Return the string representation of the path with forward (/) @@ -234,17 +210,17 @@ def joinpath(self, *pathsegments): paths) or a totally different path (if one of the arguments is anchored). """ - return self.with_segments(*self._raw_paths, *pathsegments) + return self.with_segments(str(self), *pathsegments) def __truediv__(self, key): try: - return self.with_segments(*self._raw_paths, key) + return self.with_segments(str(self), key) except TypeError: return NotImplemented def __rtruediv__(self, key): try: - return self.with_segments(key, *self._raw_paths) + return self.with_segments(key, str(self)) except TypeError: return NotImplemented diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index b933dd512ee..7689c10604d 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -77,6 +77,10 @@ class PurePath(PurePathBase): """ __slots__ = ( + # The `_raw_paths` slot stores unjoined string paths. This is set in + # the `__init__()` method. + '_raw_paths', + # The `_drv`, `_root` and `_tail_cached` slots store parsed and # normalized parts of the path. They are set when any of the `drive`, # `root` or `_tail` properties are accessed for the first time. The @@ -140,9 +144,15 @@ def __init__(self, *args): "object where __fspath__ returns a str, " f"not {type(path).__name__!r}") paths.append(path) - # Avoid calling super().__init__, as an optimisation self._raw_paths = paths + def with_segments(self, *pathsegments): + """Construct a new path object from any number of path-like objects. + Subclasses may override this method to customize how new path objects + are created from methods like `iterdir()`. + """ + return type(self)(*pathsegments) + def joinpath(self, *pathsegments): """Combine this path with one or several arguments, and return a new path representing either a subpath (if all arguments are relative @@ -304,14 +314,29 @@ def _parse_pattern(cls, pattern): parts.append('') return parts + @property + def _raw_path(self): + paths = self._raw_paths + if len(paths) == 1: + return paths[0] + elif paths: + # Join path segments from the initializer. + path = self.parser.join(*paths) + # Cache the joined path. + paths.clear() + paths.append(path) + return path + else: + paths.append('') + return '' + @property def drive(self): """The drive prefix (letter or UNC path), if any.""" try: return self._drv except AttributeError: - raw_path = PurePathBase.__str__(self) - self._drv, self._root, self._tail_cached = self._parse_path(raw_path) + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) return self._drv @property @@ -320,8 +345,7 @@ def root(self): try: return self._root except AttributeError: - raw_path = PurePathBase.__str__(self) - self._drv, self._root, self._tail_cached = self._parse_path(raw_path) + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) return self._root @property @@ -329,8 +353,7 @@ def _tail(self): try: return self._tail_cached except AttributeError: - raw_path = PurePathBase.__str__(self) - self._drv, self._root, self._tail_cached = self._parse_path(raw_path) + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) return self._tail_cached @property diff --git a/Lib/pathlib/_types.py b/Lib/pathlib/_types.py index 60df94d0b46..baa4a7e2af5 100644 --- a/Lib/pathlib/_types.py +++ b/Lib/pathlib/_types.py @@ -14,7 +14,6 @@ class Parser(Protocol): """ sep: str - def join(self, path: str, *paths: str) -> str: ... def split(self, path: str) -> tuple[str, str]: ... def splitdrive(self, path: str) -> tuple[str, str]: ... def splitext(self, path: str) -> tuple[str, str]: ... diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index ac3a3b4f15c..ef482c31154 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -229,6 +229,31 @@ def test_fspath_common(self): self._check_str(p.__fspath__(), ('a/b',)) self._check_str(os.fspath(p), ('a/b',)) + def test_bytes(self): + P = self.cls + with self.assertRaises(TypeError): + P(b'a') + with self.assertRaises(TypeError): + P(b'a', 'b') + with self.assertRaises(TypeError): + P('a', b'b') + with self.assertRaises(TypeError): + P('a').joinpath(b'b') + with self.assertRaises(TypeError): + P('a') / b'b' + with self.assertRaises(TypeError): + b'a' / P('b') + with self.assertRaises(TypeError): + P('a').match(b'b') + with self.assertRaises(TypeError): + P('a').relative_to(b'b') + with self.assertRaises(TypeError): + P('a').with_name(b'b') + with self.assertRaises(TypeError): + P('a').with_stem(b'b') + with self.assertRaises(TypeError): + P('a').with_suffix(b'b') + def test_bytes_exc_message(self): P = self.cls message = (r"argument should be a str or an os\.PathLike object " diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index e230dd18879..9198a0cbc45 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -53,7 +53,15 @@ def test_parser(self): class DummyPurePath(PurePathBase): - __slots__ = () + __slots__ = ('_segments',) + + def __init__(self, *segments): + self._segments = segments + + def __str__(self): + if self._segments: + return self.parser.join(*self._segments) + return '' def __eq__(self, other): if not isinstance(other, DummyPurePath): @@ -66,6 +74,9 @@ def __hash__(self): def __repr__(self): return "{}({!r})".format(self.__class__.__name__, self.as_posix()) + def with_segments(self, *pathsegments): + return type(self)(*pathsegments) + class DummyPurePathTest(unittest.TestCase): cls = DummyPurePath @@ -97,30 +108,11 @@ def test_constructor_common(self): P('a/b/c') P('/a/b/c') - def test_bytes(self): - P = self.cls - with self.assertRaises(TypeError): - P(b'a') - with self.assertRaises(TypeError): - P(b'a', 'b') - with self.assertRaises(TypeError): - P('a', b'b') - with self.assertRaises(TypeError): - P('a').joinpath(b'b') - with self.assertRaises(TypeError): - P('a') / b'b' - with self.assertRaises(TypeError): - b'a' / P('b') - with self.assertRaises(TypeError): - P('a').match(b'b') - with self.assertRaises(TypeError): - P('a').relative_to(b'b') - with self.assertRaises(TypeError): - P('a').with_name(b'b') - with self.assertRaises(TypeError): - P('a').with_stem(b'b') - with self.assertRaises(TypeError): - P('a').with_suffix(b'b') + def test_fspath_common(self): + self.assertRaises(TypeError, os.fspath, self.cls('')) + + def test_as_bytes_common(self): + self.assertRaises(TypeError, bytes, self.cls('')) def _check_str_subclass(self, *args): # Issue #21127: it should be possible to construct a PurePath object @@ -1286,36 +1278,6 @@ def test_is_absolute_windows(self): # Tests for the virtual classes. # -class PathBaseTest(PurePathBaseTest): - cls = PathBase - - def test_not_implemented_error(self): - p = self.cls('') - e = NotImplementedError - self.assertRaises(e, p.stat) - self.assertRaises(e, p.exists) - self.assertRaises(e, p.is_dir) - self.assertRaises(e, p.is_file) - self.assertRaises(e, p.is_symlink) - self.assertRaises(e, p.open) - self.assertRaises(e, p.read_bytes) - self.assertRaises(e, p.read_text) - self.assertRaises(e, p.write_bytes, b'foo') - self.assertRaises(e, p.write_text, 'foo') - self.assertRaises(e, p.iterdir) - self.assertRaises(e, lambda: list(p.glob('*'))) - self.assertRaises(e, lambda: list(p.rglob('*'))) - self.assertRaises(e, lambda: list(p.walk())) - self.assertRaises(e, p.readlink) - self.assertRaises(e, p.symlink_to, 'foo') - self.assertRaises(e, p.mkdir) - - def test_fspath_common(self): - self.assertRaises(TypeError, os.fspath, self.cls('')) - - def test_as_bytes_common(self): - self.assertRaises(TypeError, bytes, self.cls('')) - class DummyPathIO(io.BytesIO): """ @@ -1342,11 +1304,19 @@ class DummyPath(PathBase): Simple implementation of PathBase that keeps files and directories in memory. """ - __slots__ = () + __slots__ = ('_segments') _files = {} _directories = {} + def __init__(self, *segments): + self._segments = segments + + def __str__(self): + if self._segments: + return self.parser.join(*self._segments) + return '' + def __eq__(self, other): if not isinstance(other, DummyPath): return NotImplemented @@ -1358,6 +1328,9 @@ def __hash__(self): def __repr__(self): return "{}({!r})".format(self.__class__.__name__, self.as_posix()) + def with_segments(self, *pathsegments): + return type(self)(*pathsegments) + def stat(self, *, follow_symlinks=True): path = str(self).rstrip('/') if path in self._files: