From 6bb91859f27c5939f007ee5424e7ed0489fe5e36 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 30 May 2024 04:05:36 +0100 Subject: [PATCH] GH-89727: Fix `os.fwalk()` recursion error on deep trees (#119638) Implement `os.fwalk()` using a list as a stack to avoid emitting recursion errors on deeply nested trees. --- Lib/os.py | 91 +++++++++++-------- Lib/test/test_os.py | 2 - ...4-05-28-00-56-59.gh-issue-89727._bxoL3.rst | 3 + 3 files changed, 56 insertions(+), 40 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst diff --git a/Lib/os.py b/Lib/os.py index ae9e646361e82c8..cef5d90a8c23dfc 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -478,24 +478,52 @@ def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd= """ sys.audit("os.fwalk", top, topdown, onerror, follow_symlinks, dir_fd) top = fspath(top) - # Note: To guard against symlink races, we use the standard - # lstat()/open()/fstat() trick. - if not follow_symlinks: - orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd) - topfd = open(top, O_RDONLY | O_NONBLOCK, dir_fd=dir_fd) - try: - if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and - path.samestat(orig_st, stat(topfd)))): - yield from _fwalk(topfd, top, isinstance(top, bytes), - topdown, onerror, follow_symlinks) - finally: - close(topfd) - - def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks): + stack = [(_fwalk_walk, (True, dir_fd, top, top, None))] + isbytes = isinstance(top, bytes) + while stack: + yield from _fwalk(stack, isbytes, topdown, onerror, follow_symlinks) + + # Each item in the _fwalk() stack is a pair (action, args). + _fwalk_walk = 0 # args: (isroot, dirfd, toppath, topname, entry) + _fwalk_yield = 1 # args: (toppath, dirnames, filenames, topfd) + _fwalk_close = 2 # args: dirfd + + def _fwalk(stack, isbytes, topdown, onerror, follow_symlinks): # Note: This uses O(depth of the directory tree) file descriptors: if # necessary, it can be adapted to only require O(1) FDs, see issue # #13734. + action, value = stack.pop() + if action == _fwalk_close: + close(value) + return + elif action == _fwalk_yield: + yield value + return + assert action == _fwalk_walk + isroot, dirfd, toppath, topname, entry = value + try: + if not follow_symlinks: + # Note: To guard against symlink races, we use the standard + # lstat()/open()/fstat() trick. + if entry is None: + orig_st = stat(topname, follow_symlinks=False, dir_fd=dirfd) + else: + orig_st = entry.stat(follow_symlinks=False) + topfd = open(topname, O_RDONLY | O_NONBLOCK, dir_fd=dirfd) + except OSError as err: + if isroot: + raise + if onerror is not None: + onerror(err) + return + stack.append((_fwalk_close, topfd)) + if not follow_symlinks: + if isroot and not st.S_ISDIR(orig_st.st_mode): + return + if not path.samestat(orig_st, stat(topfd)): + return + scandir_it = scandir(topfd) dirs = [] nondirs = [] @@ -521,31 +549,18 @@ def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks): if topdown: yield toppath, dirs, nondirs, topfd + else: + stack.append((_fwalk_yield, (toppath, dirs, nondirs, topfd))) - for name in dirs if entries is None else zip(dirs, entries): - try: - if not follow_symlinks: - if topdown: - orig_st = stat(name, dir_fd=topfd, follow_symlinks=False) - else: - assert entries is not None - name, entry = name - orig_st = entry.stat(follow_symlinks=False) - dirfd = open(name, O_RDONLY | O_NONBLOCK, dir_fd=topfd) - except OSError as err: - if onerror is not None: - onerror(err) - continue - try: - if follow_symlinks or path.samestat(orig_st, stat(dirfd)): - dirpath = path.join(toppath, name) - yield from _fwalk(dirfd, dirpath, isbytes, - topdown, onerror, follow_symlinks) - finally: - close(dirfd) - - if not topdown: - yield toppath, dirs, nondirs, topfd + toppath = path.join(toppath, toppath[:0]) # Add trailing slash. + if entries is None: + stack.extend( + (_fwalk_walk, (False, topfd, toppath + name, name, None)) + for name in dirs[::-1]) + else: + stack.extend( + (_fwalk_walk, (False, topfd, toppath + name, name, entry)) + for name, entry in zip(dirs[::-1], entries[::-1])) __all__.append("fwalk") diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 941fa2b2c5c87f3..7dc5784820e8476 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -1687,8 +1687,6 @@ def test_fd_leak(self): # fwalk() keeps file descriptors open test_walk_many_open_files = None - # fwalk() still uses recursion - test_walk_above_recursion_limit = None class BytesWalkTests(WalkTests): diff --git a/Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst b/Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst new file mode 100644 index 000000000000000..92222bc673350f2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst @@ -0,0 +1,3 @@ +Fix issue with :func:`os.fwalk` where a :exc:`RecursionError` was raised on +deep directory trees by adjusting the implementation to be iterative instead +of recursive.