python · jonburdo · Jan 3, 2023 · Jan 3, 2023 · Jan 3, 2023 · Jan 3, 2023
diff --git a/Lib/os.py b/Lib/os.py
@@ -302,7 +302,7 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
     subdirectories (directories are generated bottom up).
 
     When topdown is true, the caller can modify the dirnames list in-place
-    (e.g., via del or slice assignment), and walk will only recurse into the
+    (e.g., via del or slice assignment), and walk will only traverse into the
     subdirectories whose names remain in dirnames; this can be used to prune the
     search, or to impose a specific order of visiting.  Modifying dirnames when
     topdown is false has no effect on the behavior of os.walk(), since the
@@ -341,31 +341,27 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
     """
     sys.audit("os.walk", top, topdown, onerror, followlinks)
 
-    stack = [fspath(top)]
-    islink, join = path.islink, path.join
-    while stack:
-        top = stack.pop()
-        if isinstance(top, tuple):
-            yield top
-            continue
+    top = fspath(top)
+    try:
+        scandir_it = scandir(top)
+    except OSError as error:
+        if onerror is not None:
+            onerror(error)
+        return
 
+    stack = []
+    islink, join = path.islink, path.join
+    while True:
         dirs = []
         nondirs = []
         walk_dirs = []
+        use_entry = True
 
         # We may not have read permission for top, in which case we can't
         # get a list of the files the directory contains.
         # We suppress the exception here, rather than blow up for a
         # minor reason when (say) a thousand readable directories are still
         # left to visit.
-        try:
-            scandir_it = scandir(top)
-        except OSError as error:
-            if onerror is not None:
-                onerror(error)
-            continue
-
-        cont = False
         with scandir_it:
             while True:
                 try:
@@ -376,7 +372,7 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
                 except OSError as error:
                     if onerror is not None:
                         onerror(error)
-                    cont = True
+                    use_entry = False
                     break
 
                 try:
@@ -408,27 +404,61 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
 
                     if walk_into:
                         walk_dirs.append(entry.path)
-        if cont:
-            continue
-
         if topdown:
-            # Yield before sub-directory traversal if going top down
-            yield top, dirs, nondirs
-            # Traverse into sub-directories
-            for dirname in reversed(dirs):
-                new_path = join(top, dirname)
-                # bpo-23605: os.path.islink() is used instead of caching
-                # entry.is_symlink() result during the loop on os.scandir() because
-                # the caller can replace the directory entry during the "yield"
-                # above.
-                if followlinks or not islink(new_path):
-                    stack.append(new_path)
+            if use_entry:
+                # Yield before sub-directory traversal if going top down
+                yield top, dirs, nondirs
+                # Append dir names to walk along with top, their parent dir
+                stack.append([top, iter(dirs)])
+
+            # Set top and scandir_it for the next iteration
+            while stack:
+                root, dirs = stack[-1]
+                for dirname in dirs:
+                    top = join(root, dirname)
+                    # bpo-23605: os.path.islink() is used instead of caching
+                    # entry.is_symlink() result during the loop on os.scandir() because
+                    # the caller can replace the directory entry during the previous
+                    # "yield"
+                    if followlinks or not islink(top):
+                        try:
+                            scandir_it = scandir(top)
+                        except OSError as error:
+                            if onerror is not None:
+                                onerror(error)
+                        else:
+                            break
+                else:
+                    stack.pop()
+                    continue
+                break
+            else:
+                return
         else:
-            # Yield after sub-directory traversal if going bottom up
-            stack.append((top, dirs, nondirs))
-            # Traverse into sub-directories
-            for new_path in reversed(walk_dirs):
-                stack.append(new_path)
+            if use_entry:
+                # Traverse into sub-directories by appending a value
+                # (entry, dirs) where dirs will be walked and then
+                # when dirs is exhausted entry will be yielded
+                stack.append(((top, dirs, nondirs), iter(walk_dirs)))
+
+            # Set top and scandir_it for the next iteration
+            while stack:
+                entry, dirs = stack[-1]
+                for top in dirs:
+                    try:
+                        scandir_it = scandir(top)
+                    except OSError as error:
+                        if onerror is not None:
+                            onerror(error)
+                    else:
+                        break
+                else:
+                    stack.pop()
+                    yield entry
+                    continue
+                break
+            else:
+                return
 
 __all__.append("walk")
 

diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
@@ -1512,6 +1512,57 @@ def test_walk_above_recursion_limit(self):
         self.assertEqual(sorted(dirs), ["SUB1", "SUB2", "d"])
         self.assertEqual(all, expected)
 
+    def test_walk_late_dirs_modification(self):
+        extra_parts = [
+            ("SUB3", "SUB31"),
+            ("SUB4", "SUB41"),
+            ("SUB5", "SUB51"),
+        ]
+        for dir_parts in extra_parts:
+            os.makedirs(os.path.join(self.walk_path, *dir_parts))
+
+        all = self.walk(self.walk_path)
+        result = []
+        result.append(next(all))
+        dirs = result[0][1]
+        dirs.sort()
+
+        # SUB1
+        # SUB1/SUB11
+        result.append(next(all))
+        result.append(next(all))
+        # SUB2 is removed
+        dirs.pop(1)
+        # swap SUB3 and SUB4
+        dirs[1], dirs[2] = dirs[2], dirs[1]
+        # SUB4
+        # SUB4/SUB41
+        # SUB3
+        result.append(next(all))
+        result.append(next(all))
+        result.append(next(all))
+        # clear while between SUB3 and SUB3/SUB31
+        dirs.clear()
+        result.extend(all)
+
+        expected_parts = [
+            ([], [], ["tmp1"]),
+            (["SUB1"], ["SUB11"], ["tmp2"]),
+            (["SUB1", "SUB11"], [], []),
+            # SUB2 skipped
+            (["SUB4"], ["SUB41"], []),
+            (["SUB4", "SUB41"], [], []),
+            (["SUB3"], ["SUB31"], []),
+            (["SUB3", "SUB31"], [], []),
+            # SUB5 skipped
+            # SUB5/SUB51 skipped
+        ]
+        expected = [
+            (os.path.join(self.walk_path, *r), d, f)
+            for r, d, f in expected_parts
+        ]
+        self.assertEqual(result, expected)
+
 
 @unittest.skipUnless(hasattr(os, 'fwalk'), "Test needs os.fwalk()")
 class FwalkTests(WalkTests):
@@ -1603,6 +1654,10 @@ def walk(self, top, **kwargs):
             bdirs[:] = list(map(os.fsencode, dirs))
             bfiles[:] = list(map(os.fsencode, files))
 
+    # modifying bdirs in self.walk above prevents late modification
+    test_walk_late_dirs_modification = None
+
+
 @unittest.skipUnless(hasattr(os, 'fwalk'), "Test needs os.fwalk()")
 class BytesFwalkTests(FwalkTests):
     """Tests for os.walk() with bytes."""
@@ -1615,6 +1670,8 @@ def fwalk(self, top='.', *args, **kwargs):
             bdirs[:] = list(map(os.fsencode, dirs))
             bfiles[:] = list(map(os.fsencode, files))
 
+    test_walk_late_dirs_modification = None
+
 
 class MakedirTests(unittest.TestCase):
     def setUp(self):