Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-89727: Fix os.fwalk RecursionError on deep trees #100347

Closed
wants to merge 31 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d9f6f44
fix os.walk RecursionError on deep trees
jonburdo Dec 19, 2022
61b8078
use _WalkAction enum
jonburdo Dec 19, 2022
d5e2042
Merge branch 'main' into iterative-os-fwalk
jonburdo Dec 19, 2022
90c123b
fix formatting
jonburdo Dec 20, 2022
570818b
remove enum from os
jonburdo Dec 20, 2022
30aeed7
add blurb
jonburdo Dec 20, 2022
55e17b8
gh-89727: Fix os.walk RecursionError on deep trees (#99803)
jonburdo Dec 19, 2022
f41cef6
gh-69929: re docs: Add more specific definition of \w (#92015)
slateny Dec 20, 2022
33ba6a5
gh-89051: Add ssl.OP_LEGACY_SERVER_CONNECT (#93927)
graingert Dec 20, 2022
f9b6796
gh-88211: Change lower-case and upper-case to match recommendations i…
tbwolfe Dec 20, 2022
77d160f
gh-100348: Fix ref cycle in `asyncio._SelectorSocketTransport` with `…
rkojedzinszky Dec 20, 2022
db820a2
gh-99925: Fix inconsistency in `json.dumps()` error messages (GH-99926)
fnesveda Dec 20, 2022
c39ce63
Clarify that every thread has its own default context in contextvars …
pablogsal Dec 20, 2022
8d2befb
run test_walk_above_recursion_limit for fwalk
jonburdo Dec 20, 2022
d29188e
set stack outside try-except in fwalk
jonburdo Dec 20, 2022
2cf7550
fix comments
jonburdo Dec 20, 2022
f2cca94
Merge branch 'main' into iterative-os-fwalk
jonburdo Dec 20, 2022
af18a1d
change ValueError to AssertionError
jonburdo Dec 20, 2022
9eedf9a
Merge branch 'main' into iterative-os-fwalk
jonburdo Mar 23, 2023
5ee50c6
add more reliable file descriptor closing logic in os.fwalk
jonburdo Mar 23, 2023
f0f9330
use a separate fd_stack for simpler cleanup and error handling
jonburdo Mar 23, 2023
ccfb955
Merge branch 'main' into iterative-os-fwalk
jonburdo Mar 23, 2023
598bdf9
run test_walk_above_recursion_limit with fwalk tests
jonburdo Mar 23, 2023
c158be3
Merge branch 'main' into iterative-os-fwalk
jonburdo Mar 24, 2023
6608a6a
make sure we don't close the same fd twice
jonburdo Mar 26, 2023
d026146
revert to using a single stack instead of a separate ffd stack
jonburdo Mar 26, 2023
762c03a
remove unused variable
jonburdo Mar 26, 2023
045fd88
get rid of unnecessary os._fwalk
jonburdo Apr 1, 2023
f3f793a
change except clause to finally clause in os.fwalk
jonburdo Apr 1, 2023
dfb9685
Merge branch 'main' into iterative-os-fwalk
AlexWaygood Apr 25, 2023
0ebc475
Merge branch 'main' into iterative-os-fwalk
erlend-aasland Jan 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 84 additions & 58 deletions Lib/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,10 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
__all__.append("walk")

if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:
class _WalkAction:
YIELD = object()
CLOSE = object()
WALK = object()

def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None):
"""Directory tree generator.
Expand All @@ -450,8 +454,8 @@ def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=
races (when follow_symlinks is False).

If dir_fd is not None, it should be a file descriptor open to a directory,
and top should be relative; top will then be relative to that directory.
(dir_fd is always supported for fwalk.)
and top should be relative; top will then be relative to that directory.
(dir_fd is always supported for fwalk.)

Caution:
Since fwalk() yields file descriptors, those are only valid until the
Expand All @@ -469,76 +473,98 @@ def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=
if 'CVS' in dirs:
dirs.remove('CVS') # don't visit CVS directories
"""
# Note: This uses O(depth of the directory tree) file descriptors: if
# necessary, it can be adapted to only require O(1) FDs, see issue
# bpo-13734 (gh-57943).
sys.audit("os.fwalk", top, topdown, onerror, follow_symlinks, dir_fd)
top = fspath(top)
isbytes = isinstance(top, bytes)
# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
if not follow_symlinks:
orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
topfd = open(top, O_RDONLY, dir_fd=dir_fd)
stack = [(_WalkAction.CLOSE, topfd)]
try:
if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and
path.samestat(orig_st, stat(topfd)))):
yield from _fwalk(topfd, top, isinstance(top, bytes),
topdown, onerror, follow_symlinks)
finally:
close(topfd)

def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks):
# Note: This uses O(depth of the directory tree) file descriptors: if
# necessary, it can be adapted to only require O(1) FDs, see issue
# #13734.
stack.append((_WalkAction.WALK, (topfd, top)))

scandir_it = scandir(topfd)
dirs = []
nondirs = []
entries = None if topdown or follow_symlinks else []
for entry in scandir_it:
name = entry.name
if isbytes:
name = fsencode(name)
try:
if entry.is_dir():
dirs.append(name)
if entries is not None:
entries.append(entry)
while stack:
action, value = stack.pop()
if action is _WalkAction.YIELD:
yield value
continue
elif action is _WalkAction.CLOSE:
close(value)
continue
elif action is _WalkAction.WALK:
topfd, top = value
else:
nondirs.append(name)
except OSError:
try:
# Add dangling symlinks, ignore disappeared files
if entry.is_symlink():
nondirs.append(name)
except OSError:
pass
raise AssertionError(f"invalid walk action: {action!r}")

scandir_it = scandir(topfd)
dirs = []
nondirs = []
entries = None if topdown or follow_symlinks else []
for entry in scandir_it:
name = entry.name
if isbytes:
name = fsencode(name)
try:
if entry.is_dir():
dirs.append(name)
if entries is not None:
entries.append(entry)
else:
nondirs.append(name)
except OSError:
try:
# Add dangling symlinks, ignore disappeared files
if entry.is_symlink():
nondirs.append(name)
except OSError:
pass

if topdown:
yield toppath, dirs, nondirs, topfd
if topdown:
# Yield top immediately, before walking subdirs
yield top, dirs, nondirs, topfd
else:
# Yield top after walking subdirs
stack.append(
(_WalkAction.YIELD, (top, dirs, nondirs, topfd)))

for name in dirs if entries is None else zip(dirs, entries):
try:
if not follow_symlinks:
if topdown:
orig_st = stat(name, dir_fd=topfd, follow_symlinks=False)
else:
assert entries is not None
name, entry = name
orig_st = entry.stat(follow_symlinks=False)
dirfd = open(name, O_RDONLY, dir_fd=topfd)
except OSError as err:
if onerror is not None:
onerror(err)
continue
try:
if follow_symlinks or path.samestat(orig_st, stat(dirfd)):
dirpath = path.join(toppath, name)
yield from _fwalk(dirfd, dirpath, isbytes,
topdown, onerror, follow_symlinks)
finally:
close(dirfd)

if not topdown:
yield toppath, dirs, nondirs, topfd
for name in (reversed(dirs) if entries is None
else zip(reversed(dirs), reversed(entries))):
try:
if not follow_symlinks:
if topdown:
orig_st = stat(name, dir_fd=topfd,
follow_symlinks=False)
else:
assert entries is not None
name, entry = name
orig_st = entry.stat(follow_symlinks=False)
dirfd = open(name, O_RDONLY, dir_fd=topfd)
except OSError as err:
if onerror is not None:
onerror(err)
continue
# Close dirfd right after all subdirs have been traversed.
# Note that we use a stack, so actions appended first are
# executed last.
stack.append((_WalkAction.CLOSE, dirfd))
# Walk all subdirs
if follow_symlinks or path.samestat(orig_st, stat(dirfd)):
dirpath = path.join(top, name)
stack.append((_WalkAction.WALK, (dirfd, dirpath)))
finally:
for action, value in reversed(stack):
if action is _WalkAction.CLOSE:
try:
close(value)
except OSError:
pass

__all__.append("fwalk")

Expand Down
2 changes: 0 additions & 2 deletions Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -1607,8 +1607,6 @@ def test_fd_leak(self):

# fwalk() keeps file descriptors open
test_walk_many_open_files = None
# fwalk() still uses recursion
test_walk_above_recursion_limit = None


class BytesWalkTests(WalkTests):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix issue with :func:`os.fwalk` where a :exc:`RecursionError` would occur on
deep directory structures by adjusting the implementation to be iterative
instead of recursive.
Loading