From 99f4691399229be2daacfacdba09ab4d4805f5c4 Mon Sep 17 00:00:00 2001 From: Jon Burdo Date: Sat, 26 Nov 2022 18:52:03 -0500 Subject: [PATCH] gh-89727: Fix os.walk RecursionError on deep trees Use a stack to implement os.walk iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees. --- Lib/os.py | 158 ++++++++++-------- ...2-11-29-20-44-54.gh-issue-89727.UJZjkk.rst | 3 + 2 files changed, 87 insertions(+), 74 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-29-20-44-54.gh-issue-89727.UJZjkk.rst diff --git a/Lib/os.py b/Lib/os.py index fd1e774fdcbcfa..5dff593260fe30 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -343,86 +343,96 @@ def walk(top, topdown=True, onerror=None, followlinks=False): return _walk(fspath(top), topdown, onerror, followlinks) def _walk(top, topdown, onerror, followlinks): - dirs = [] - nondirs = [] - walk_dirs = [] - - # We may not have read permission for top, in which case we can't - # get a list of the files the directory contains. os.walk - # always suppressed the exception then, rather than blow up for a - # minor reason when (say) a thousand readable directories are still - # left to visit. That logic is copied here. - try: - # Note that scandir is global in this module due - # to earlier import-*. - scandir_it = scandir(top) - except OSError as error: - if onerror is not None: - onerror(error) - return + stack = [(False, top)] + while stack: + is_result, top = stack.pop() + if is_result: + yield top + continue - with scandir_it: - while True: - try: + dirs = [] + nondirs = [] + walk_dirs = [] + + # We may not have read permission for top, in which case we can't + # get a list of the files the directory contains. os.walk + # always suppressed the exception then, rather than blow up for a + # minor reason when (say) a thousand readable directories are still + # left to visit. That logic is copied here. + try: + # Note that scandir is global in this module due + # to earlier import-*. + scandir_it = scandir(top) + except OSError as error: + if onerror is not None: + onerror(error) + continue + + cont = False + with scandir_it: + while True: try: - entry = next(scandir_it) - except StopIteration: + try: + entry = next(scandir_it) + except StopIteration: + break + except OSError as error: + if onerror is not None: + onerror(error) + cont = True break - except OSError as error: - if onerror is not None: - onerror(error) - return - try: - is_dir = entry.is_dir() - except OSError: - # If is_dir() raises an OSError, consider that the entry is not - # a directory, same behaviour than os.path.isdir(). - is_dir = False - - if is_dir: - dirs.append(entry.name) - else: - nondirs.append(entry.name) + try: + is_dir = entry.is_dir() + except OSError: + # If is_dir() raises an OSError, consider that the entry is not + # a directory, same behaviour than os.path.isdir(). + is_dir = False - if not topdown and is_dir: - # Bottom-up: recurse into sub-directory, but exclude symlinks to - # directories if followlinks is False - if followlinks: - walk_into = True + if is_dir: + dirs.append(entry.name) else: - try: - is_symlink = entry.is_symlink() - except OSError: - # If is_symlink() raises an OSError, consider that the - # entry is not a symbolic link, same behaviour than - # os.path.islink(). - is_symlink = False - walk_into = not is_symlink - - if walk_into: - walk_dirs.append(entry.path) - - # Yield before recursion if going top down - if topdown: - yield top, dirs, nondirs - - # Recurse into sub-directories - islink, join = path.islink, path.join - for dirname in dirs: - new_path = join(top, dirname) - # Issue #23605: os.path.islink() is used instead of caching - # entry.is_symlink() result during the loop on os.scandir() because - # the caller can replace the directory entry during the "yield" - # above. - if followlinks or not islink(new_path): - yield from _walk(new_path, topdown, onerror, followlinks) - else: - # Recurse into sub-directories - for new_path in walk_dirs: - yield from _walk(new_path, topdown, onerror, followlinks) - # Yield after recursion if going bottom up - yield top, dirs, nondirs + nondirs.append(entry.name) + + if not topdown and is_dir: + # Bottom-up: traverse into sub-directory, but exclude symlinks to + # directories if followlinks is False + if followlinks: + walk_into = True + else: + try: + is_symlink = entry.is_symlink() + except OSError: + # If is_symlink() raises an OSError, consider that the + # entry is not a symbolic link, same behaviour than + # os.path.islink(). + is_symlink = False + walk_into = not is_symlink + + if walk_into: + walk_dirs.append(entry.path) + if cont: + continue + + # Yield before sub-directory traversal if going top down + if topdown: + yield top, dirs, nondirs + # Traverse into sub-directories + islink, join = path.islink, path.join + for dirname in reversed(dirs): + new_path = join(top, dirname) + # Issue #23605: os.path.islink() is used instead of caching + # entry.is_symlink() result during the loop on os.scandir() because + # the caller can replace the directory entry during the "yield" + # above. + if followlinks or not islink(new_path): + stack.append((False, new_path)) + else: + # Yield after sub-directory traversal if going bottom up + stack.append((True, (top, dirs, nondirs))) + # Traverse into sub-directories + for new_path in reversed(walk_dirs): + stack.append((False, new_path)) __all__.append("walk") diff --git a/Misc/NEWS.d/next/Library/2022-11-29-20-44-54.gh-issue-89727.UJZjkk.rst b/Misc/NEWS.d/next/Library/2022-11-29-20-44-54.gh-issue-89727.UJZjkk.rst new file mode 100644 index 00000000000000..9b86f0cbaec2ad --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-29-20-44-54.gh-issue-89727.UJZjkk.rst @@ -0,0 +1,3 @@ +Fix issue with :func:`os.walk` where a :exc:`RecursionError` would occur on +deep directory structures by adjusting the implementation of +:func:`os._walk` to be iterative instead of recursive.