From 713df2c53489ce8012d0ede10b70950e6b0d8372 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Wed, 22 Mar 2023 18:45:25 +0400 Subject: [PATCH] GH-89727: Fix pathlib.Path.walk RecursionError on deep trees (GH-100282) Use a stack to implement `pathlib.Path.walk()` iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees. Co-authored-by: Barney Gale Co-authored-by: Brett Cannon --- Lib/pathlib.py | 78 ++++++++++--------- Lib/test/test_pathlib.py | 13 ++++ ...2-12-16-10-27-58.gh-issue-89727.y64ZLM.rst | 1 + 3 files changed, 54 insertions(+), 38 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 55c44f12e5a2fb..a126bf2fe5570a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1197,45 +1197,47 @@ def expanduser(self): def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) - return self._walk(top_down, on_error, follow_symlinks) - - def _walk(self, top_down, on_error, follow_symlinks): - # We may not have read permission for self, in which case we can't - # get a list of the files the directory contains. os.walk - # always suppressed the exception then, rather than blow up for a - # minor reason when (say) a thousand readable directories are still - # left to visit. That logic is copied here. - try: - scandir_it = self._scandir() - except OSError as error: - if on_error is not None: - on_error(error) - return - - with scandir_it: - dirnames = [] - filenames = [] - for entry in scandir_it: - try: - is_dir = entry.is_dir(follow_symlinks=follow_symlinks) - except OSError: - # Carried over from os.path.isdir(). - is_dir = False - - if is_dir: - dirnames.append(entry.name) - else: - filenames.append(entry.name) - - if top_down: - yield self, dirnames, filenames - - for dirname in dirnames: - dirpath = self._make_child_relpath(dirname) - yield from dirpath._walk(top_down, on_error, follow_symlinks) + paths = [self] + + while paths: + path = paths.pop() + if isinstance(path, tuple): + yield path + continue + + # We may not have read permission for self, in which case we can't + # get a list of the files the directory contains. os.walk() + # always suppressed the exception in that instance, rather than + # blow up for a minor reason when (say) a thousand readable + # directories are still left to visit. That logic is copied here. + try: + scandir_it = path._scandir() + except OSError as error: + if on_error is not None: + on_error(error) + continue + + with scandir_it: + dirnames = [] + filenames = [] + for entry in scandir_it: + try: + is_dir = entry.is_dir(follow_symlinks=follow_symlinks) + except OSError: + # Carried over from os.path.isdir(). + is_dir = False + + if is_dir: + dirnames.append(entry.name) + else: + filenames.append(entry.name) + + if top_down: + yield path, dirnames, filenames + else: + paths.append((path, dirnames, filenames)) - if not top_down: - yield self, dirnames, filenames + paths += [path._make_child_relpath(d) for d in reversed(dirnames)] class PosixPath(Path, PurePosixPath): diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index f05dead5886743..3041630da67899 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -13,6 +13,7 @@ from unittest import mock from test.support import import_helper +from test.support import set_recursion_limit from test.support import is_emscripten, is_wasi from test.support import os_helper from test.support.os_helper import TESTFN, FakePath @@ -2793,6 +2794,18 @@ def test_walk_many_open_files(self): self.assertEqual(next(it), expected) path = path / 'd' + def test_walk_above_recursion_limit(self): + recursion_limit = 40 + # directory_depth > recursion_limit + directory_depth = recursion_limit + 10 + base = pathlib.Path(os_helper.TESTFN, 'deep') + path = pathlib.Path(base, *(['d'] * directory_depth)) + path.mkdir(parents=True) + + with set_recursion_limit(recursion_limit): + list(base.walk()) + list(base.walk(top_down=False)) + class PathTest(_BasePathTest, unittest.TestCase): cls = pathlib.Path diff --git a/Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst b/Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst new file mode 100644 index 00000000000000..f9ac1475dceb00 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst @@ -0,0 +1 @@ +Fix pathlib.Path.walk RecursionError on deep directory trees by rewriting it using iteration instead of recursion.