From 0b62e96401f44b3009a7bcabbde9a2b3fd9a206f Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 5 May 2023 20:04:53 +0100 Subject: [PATCH] GH-100479: Add `pathlib.PurePath.with_segments()` (GH-103975) Add `pathlib.PurePath.with_segments()`, which creates a path object from arguments. This method is called whenever a derivative path is created, such as from `pathlib.PurePath.parent`. Subclasses may override this method to share information between path objects. Co-authored-by: Alex Waygood --- Doc/library/pathlib.rst | 28 +++++++- Doc/whatsnew/3.12.rst | 5 ++ Lib/pathlib.py | 66 ++++++++++--------- Lib/test/test_pathlib.py | 52 ++++++++++----- ...-04-03-22-02-35.gh-issue-100479.kNBjQm.rst | 4 ++ 5 files changed, 108 insertions(+), 47 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-04-03-22-02-35.gh-issue-100479.kNBjQm.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 14118127835bbe..5ffa33d4e61f19 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -530,10 +530,10 @@ Pure paths provide the following methods and properties: unintended effects. -.. method:: PurePath.joinpath(*other) +.. method:: PurePath.joinpath(*pathsegments) Calling this method is equivalent to combining the path with each of - the *other* arguments in turn:: + the given *pathsegments* in turn:: >>> PurePosixPath('/etc').joinpath('passwd') PurePosixPath('/etc/passwd') @@ -680,6 +680,30 @@ Pure paths provide the following methods and properties: PureWindowsPath('README') +.. method:: PurePath.with_segments(*pathsegments) + + Create a new path object of the same type by combining the given + *pathsegments*. This method is called whenever a derivative path is created, + such as from :attr:`parent` and :meth:`relative_to`. Subclasses may + override this method to pass information to derivative paths, for example:: + + from pathlib import PurePosixPath + + class MyPath(PurePosixPath): + def __init__(self, *pathsegments, session_id): + super().__init__(*pathsegments) + self.session_id = session_id + + def with_segments(self, *pathsegments): + return type(self)(*pathsegments, session_id=self.session_id) + + etc = MyPath('/etc', session_id=42) + hosts = etc / 'hosts' + print(hosts.session_id) # 42 + + .. versionadded:: 3.12 + + .. _concrete-paths: diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 4f952e2a37ef2f..ccddc8bd832f29 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -348,6 +348,11 @@ inspect pathlib ------- +* Add support for subclassing :class:`pathlib.PurePath` and + :class:`~pathlib.Path`, plus their Posix- and Windows-specific variants. + Subclasses may override the :meth:`~pathlib.PurePath.with_segments` method + to pass information between path instances. + * Add :meth:`~pathlib.Path.walk` for walking the directory trees and generating all file or directory names within them, similar to :func:`os.walk`. (Contributed by Stanislav Zmiev in :gh:`90385`.) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f32e1e2d822834..9aa3c1e52447d1 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -204,11 +204,10 @@ def _select_from(self, parent_path, scandir): class _PathParents(Sequence): """This object provides sequence-like access to the logical ancestors of a path. Don't try to construct it yourself.""" - __slots__ = ('_pathcls', '_drv', '_root', '_tail') + __slots__ = ('_path', '_drv', '_root', '_tail') def __init__(self, path): - # We don't store the instance to avoid reference cycles - self._pathcls = type(path) + self._path = path self._drv = path.drive self._root = path.root self._tail = path._tail @@ -224,11 +223,11 @@ def __getitem__(self, idx): raise IndexError(idx) if idx < 0: idx += len(self) - return self._pathcls._from_parsed_parts(self._drv, self._root, - self._tail[:-idx - 1]) + return self._path._from_parsed_parts(self._drv, self._root, + self._tail[:-idx - 1]) def __repr__(self): - return "<{}.parents>".format(self._pathcls.__name__) + return "<{}.parents>".format(type(self._path).__name__) class PurePath(object): @@ -316,6 +315,13 @@ def __init__(self, *args): else: self._raw_path = self._flavour.join(*paths) + def with_segments(self, *pathsegments): + """Construct a new path object from any number of path-like objects. + Subclasses may override this method to customize how new path objects + are created from methods like `iterdir()`. + """ + return type(self)(*pathsegments) + @classmethod def _parse_path(cls, path): if not path: @@ -342,15 +348,14 @@ def _load_parts(self): self._root = root self._tail_cached = tail - @classmethod - def _from_parsed_parts(cls, drv, root, tail): - path = cls._format_parsed_parts(drv, root, tail) - self = cls(path) - self._str = path or '.' - self._drv = drv - self._root = root - self._tail_cached = tail - return self + def _from_parsed_parts(self, drv, root, tail): + path_str = self._format_parsed_parts(drv, root, tail) + path = self.with_segments(path_str) + path._str = path_str or '.' + path._drv = drv + path._root = root + path._tail_cached = tail + return path @classmethod def _format_parsed_parts(cls, drv, root, tail): @@ -584,8 +589,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False): "scheduled for removal in Python {remove}") warnings._deprecated("pathlib.PurePath.relative_to(*args)", msg, remove=(3, 14)) - path_cls = type(self) - other = path_cls(other, *_deprecated) + other = self.with_segments(other, *_deprecated) for step, path in enumerate([other] + list(other.parents)): if self.is_relative_to(path): break @@ -594,7 +598,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False): if step and not walk_up: raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") parts = ['..'] * step + self._tail[len(path._tail):] - return path_cls(*parts) + return self.with_segments(*parts) def is_relative_to(self, other, /, *_deprecated): """Return True if the path is relative to another path or False. @@ -605,7 +609,7 @@ def is_relative_to(self, other, /, *_deprecated): "scheduled for removal in Python {remove}") warnings._deprecated("pathlib.PurePath.is_relative_to(*args)", msg, remove=(3, 14)) - other = type(self)(other, *_deprecated) + other = self.with_segments(other, *_deprecated) return other == self or other in self.parents @property @@ -617,13 +621,13 @@ def parts(self): else: return tuple(self._tail) - def joinpath(self, *args): + def joinpath(self, *pathsegments): """Combine this path with one or several arguments, and return a new path representing either a subpath (if all arguments are relative paths) or a totally different path (if one of the arguments is anchored). """ - return self.__class__(self, *args) + return self.with_segments(self, *pathsegments) def __truediv__(self, key): try: @@ -633,7 +637,7 @@ def __truediv__(self, key): def __rtruediv__(self, key): try: - return type(self)(key, self) + return self.with_segments(key, self) except TypeError: return NotImplemented @@ -650,6 +654,8 @@ def parent(self): @property def parents(self): """A sequence of this path's logical parents.""" + # The value of this property should not be cached on the path object, + # as doing so would introduce a reference cycle. return _PathParents(self) def is_absolute(self): @@ -680,7 +686,7 @@ def match(self, path_pattern): """ Return True if this path matches the given pattern. """ - pat = type(self)(path_pattern) + pat = self.with_segments(path_pattern) if not pat.parts: raise ValueError("empty pattern") pat_parts = pat._parts_normcase @@ -755,7 +761,7 @@ def _make_child_relpath(self, name): path_str = f'{path_str}{name}' else: path_str = name - path = type(self)(path_str) + path = self.with_segments(path_str) path._str = path_str path._drv = self.drive path._root = self.root @@ -805,7 +811,7 @@ def samefile(self, other_path): try: other_st = other_path.stat() except AttributeError: - other_st = self.__class__(other_path).stat() + other_st = self.with_segments(other_path).stat() return self._flavour.samestat(st, other_st) def iterdir(self): @@ -867,7 +873,7 @@ def absolute(self): cwd = self._flavour.abspath(self.drive) else: cwd = os.getcwd() - return type(self)(cwd, self) + return self.with_segments(cwd, self) def resolve(self, strict=False): """ @@ -885,7 +891,7 @@ def check_eloop(e): except OSError as e: check_eloop(e) raise - p = type(self)(s) + p = self.with_segments(s) # In non-strict mode, realpath() doesn't raise on symlink loops. # Ensure we get an exception by calling stat() @@ -975,7 +981,7 @@ def readlink(self): """ if not hasattr(os, "readlink"): raise NotImplementedError("os.readlink() not available on this system") - return type(self)(os.readlink(self)) + return self.with_segments(os.readlink(self)) def touch(self, mode=0o666, exist_ok=True): """ @@ -1064,7 +1070,7 @@ def rename(self, target): Returns the new Path instance pointing to the target path. """ os.rename(self, target) - return self.__class__(target) + return self.with_segments(target) def replace(self, target): """ @@ -1077,7 +1083,7 @@ def replace(self, target): Returns the new Path instance pointing to the target path. """ os.replace(self, target) - return self.__class__(target) + return self.with_segments(target) def symlink_to(self, target, target_is_directory=False): """ diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index a932e03df4236d..7586610833b063 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -29,11 +29,12 @@ # class _BasePurePathSubclass(object): - init_called = False + def __init__(self, *pathsegments, session_id): + super().__init__(*pathsegments) + self.session_id = session_id - def __init__(self, *args): - super().__init__(*args) - self.init_called = True + def with_segments(self, *pathsegments): + return type(self)(*pathsegments, session_id=self.session_id) class _BasePurePathTest(object): @@ -121,20 +122,21 @@ def test_str_subclass_common(self): self._check_str_subclass('a/b.txt') self._check_str_subclass('/a/b.txt') - def test_init_called_common(self): + def test_with_segments_common(self): class P(_BasePurePathSubclass, self.cls): pass - p = P('foo', 'bar') - self.assertTrue((p / 'foo').init_called) - self.assertTrue(('foo' / p).init_called) - self.assertTrue(p.joinpath('foo').init_called) - self.assertTrue(p.with_name('foo').init_called) - self.assertTrue(p.with_stem('foo').init_called) - self.assertTrue(p.with_suffix('.foo').init_called) - self.assertTrue(p.relative_to('foo').init_called) - self.assertTrue(p.parent.init_called) + p = P('foo', 'bar', session_id=42) + self.assertEqual(42, (p / 'foo').session_id) + self.assertEqual(42, ('foo' / p).session_id) + self.assertEqual(42, p.joinpath('foo').session_id) + self.assertEqual(42, p.with_name('foo').session_id) + self.assertEqual(42, p.with_stem('foo').session_id) + self.assertEqual(42, p.with_suffix('.foo').session_id) + self.assertEqual(42, p.with_segments('foo').session_id) + self.assertEqual(42, p.relative_to('foo').session_id) + self.assertEqual(42, p.parent.session_id) for parent in p.parents: - self.assertTrue(parent.init_called) + self.assertEqual(42, parent.session_id) def _get_drive_root_parts(self, parts): path = self.cls(*parts) @@ -1647,6 +1649,26 @@ def test_home(self): env['HOME'] = os.path.join(BASE, 'home') self._test_home(self.cls.home()) + def test_with_segments(self): + class P(_BasePurePathSubclass, self.cls): + pass + p = P(BASE, session_id=42) + self.assertEqual(42, p.absolute().session_id) + self.assertEqual(42, p.resolve().session_id) + self.assertEqual(42, p.with_segments('~').expanduser().session_id) + self.assertEqual(42, (p / 'fileA').rename(p / 'fileB').session_id) + self.assertEqual(42, (p / 'fileB').replace(p / 'fileA').session_id) + if os_helper.can_symlink(): + self.assertEqual(42, (p / 'linkA').readlink().session_id) + for path in p.iterdir(): + self.assertEqual(42, path.session_id) + for path in p.glob('*'): + self.assertEqual(42, path.session_id) + for path in p.rglob('*'): + self.assertEqual(42, path.session_id) + for dirpath, dirnames, filenames in p.walk(): + self.assertEqual(42, dirpath.session_id) + def test_samefile(self): fileA_path = os.path.join(BASE, 'fileA') fileB_path = os.path.join(BASE, 'dirB', 'fileB') diff --git a/Misc/NEWS.d/next/Library/2023-04-03-22-02-35.gh-issue-100479.kNBjQm.rst b/Misc/NEWS.d/next/Library/2023-04-03-22-02-35.gh-issue-100479.kNBjQm.rst new file mode 100644 index 00000000000000..58db90480d2ff0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-04-03-22-02-35.gh-issue-100479.kNBjQm.rst @@ -0,0 +1,4 @@ +Add :meth:`pathlib.PurePath.with_segments`, which creates a path object from +arguments. This method is called whenever a derivative path is created, such +as from :attr:`pathlib.PurePath.parent`. Subclasses may override this method +to share information between path objects.