From e1be3cd29396d61d3392bdcabd015f51da760bd1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 28 Jan 2024 18:47:38 +0000 Subject: [PATCH 1/2] GH-70303: Make `pathlib.Path.glob('**')` return both files and directories Return files and directories from `pathlib.Path.glob()` if the pattern ends with `**`. This is more compatible with `PurePath.full_match()` and with other glob implementations such as bash and `glob.glob()`. Users can add a trailing slash to match only directories. In my previous patch I added a `FutureWarning` with the intention of fixing this in Python 3.15. Upon further reflection I think this was an unnecessarily cautious remedy to a clear bug. --- Doc/library/pathlib.rst | 5 ++--- Lib/pathlib/__init__.py | 8 ------- Lib/test/test_pathlib/test_pathlib.py | 12 ----------- Lib/test/test_pathlib/test_pathlib_abc.py | 21 +++++++++++++++++++ ...4-01-28-18-38-18.gh-issue-70303._Lt_pj.rst | 2 ++ 5 files changed, 25 insertions(+), 23 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-28-18-38-18.gh-issue-70303._Lt_pj.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index f1aba793fda03e..f94b6fb3805684 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1038,9 +1038,8 @@ call fails (for example because the path doesn't exist). The *follow_symlinks* parameter was added. .. versionchanged:: 3.13 - Emits :exc:`FutureWarning` if the pattern ends with "``**``". In a - future Python release, patterns with this ending will match both files - and directories. Add a trailing slash to match only directories. + Return files and directories if *pattern* ends with "``**``". In + previous versions, only directories were returned. .. versionchanged:: 3.13 The *pattern* parameter accepts a :term:`path-like object`. diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index cc159edab5796f..4447f98e3e8689 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -465,14 +465,6 @@ def _pattern_stack(self): elif pattern[-1] in (self.pathmod.sep, self.pathmod.altsep): # GH-65238: pathlib doesn't preserve trailing slash. Add it back. parts.append('') - elif parts[-1] == '**': - # GH-70303: '**' only matches directories. Add trailing slash. - warnings.warn( - "Pattern ending '**' will match files and directories in a " - "future Python release. Add a trailing slash to match only " - "directories and remove this warning.", - FutureWarning, 4) - parts.append('') parts.reverse() return parts diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 5ce3b605c58e63..4a8b95b1f75938 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1265,18 +1265,6 @@ def test_glob_above_recursion_limit(self): with set_recursion_limit(recursion_limit): list(base.glob('**/')) - def test_glob_recursive_no_trailing_slash(self): - P = self.cls - p = P(self.base) - with self.assertWarns(FutureWarning): - p.glob('**') - with self.assertWarns(FutureWarning): - p.glob('*/**') - with self.assertWarns(FutureWarning): - p.rglob('**') - with self.assertWarns(FutureWarning): - p.rglob('*/**') - def test_glob_pathlike(self): P = self.cls p = P(self.base) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index ab989cb5503f99..b37ae62c614a5e 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1766,16 +1766,26 @@ def _check(path, glob, expected): _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/..", "dirB/linkD/.."]) + _check(p, "dir*/**", [ + "dirA", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", + "dirB", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", + "dirC", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", + "dirE"]) _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", "dirC/", "dirC/dirD/", "dirE/"]) _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", "dirB/linkD/..", "dirA/linkC/linkD/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check(p, "dir*/*/**", [ + "dirA/linkC", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", + "dirB/linkD", "dirB/linkD/fileB", + "dirC/dirD", "dirC/dirD/fileD"]) _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirA/linkC/linkD/..", "dirB/linkD/..", "dirC/dirD/.."]) _check(p, "dir*/**/fileC", ["dirC/fileC"]) _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) + _check(p, "*/dirD/**", ["dirC/dirD", "dirC/dirD/fileD"]) _check(p, "*/dirD/**/", ["dirC/dirD/"]) @needs_symlinks @@ -1792,12 +1802,20 @@ def _check(path, glob, expected): _check(p, "*/fileB", ["dirB/fileB"]) _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/"]) _check(p, "dir*/*/..", ["dirC/dirD/.."]) + _check(p, "dir*/**", [ + "dirA", "dirA/linkC", + "dirB", "dirB/fileB", "dirB/linkD", + "dirC", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", + "dirE"]) _check(p, "dir*/**/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"]) _check(p, "dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check(p, "dir*/*/**", ["dirC/dirD", "dirC/dirD/fileD"]) _check(p, "dir*/*/**/", ["dirC/dirD/"]) _check(p, "dir*/*/**/..", ["dirC/dirD/.."]) _check(p, "dir*/**/fileC", ["dirC/fileC"]) + _check(p, "dir*/*/../dirD/**", ["dirC/dirD/../dirD", "dirC/dirD/../dirD/fileD"]) _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) + _check(p, "*/dirD/**", ["dirC/dirD", "dirC/dirD/fileD"]) _check(p, "*/dirD/**/", ["dirC/dirD/"]) def test_rglob_common(self): @@ -1834,10 +1852,13 @@ def _check(glob, expected): "dirC/dirD", "dirC/dirD/fileD"]) _check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) _check(p.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p.rglob("dir*/**"), ["dirC/dirD", "dirC/dirD/fileD"]) _check(p.rglob("dir*/**/"), ["dirC/dirD/"]) _check(p.rglob("*/*"), ["dirC/dirD/fileD"]) _check(p.rglob("*/"), ["dirC/dirD/"]) _check(p.rglob(""), ["dirC/", "dirC/dirD/"]) + _check(p.rglob("**"), [ + "dirC", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt"]) _check(p.rglob("**/"), ["dirC/", "dirC/dirD/"]) # gh-91616, a re module regression _check(p.rglob("*.txt"), ["dirC/novel.txt"]) diff --git a/Misc/NEWS.d/next/Library/2024-01-28-18-38-18.gh-issue-70303._Lt_pj.rst b/Misc/NEWS.d/next/Library/2024-01-28-18-38-18.gh-issue-70303._Lt_pj.rst new file mode 100644 index 00000000000000..dedda24b481241 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-28-18-38-18.gh-issue-70303._Lt_pj.rst @@ -0,0 +1,2 @@ +Return both files and directories from :meth:`pathlib.Path.glob` if a +pattern ends with "``**``". Previously only directories were returned. From 54aadc906d028978714c555f2b88be1d95a08d48 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 29 Jan 2024 18:19:15 +0000 Subject: [PATCH 2/2] whatsnew --- Doc/whatsnew/3.13.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 985e34b453f63a..cd2dc750a6e4e6 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -350,6 +350,11 @@ pathlib (Contributed by Barney Gale in :gh:`77609` and :gh:`105793`, and Kamil Turek in :gh:`107962`). +* Return files and directories from :meth:`pathlib.Path.glob` and + :meth:`~pathlib.Path.rglob` when given a pattern that ends with "``**``". In + earlier versions, only directories were returned. + (Contributed by Barney Gale in :gh:`70303`). + pdb --- @@ -1213,6 +1218,11 @@ Changes in the Python API * :class:`mailbox.Maildir` now ignores files with a leading dot. (Contributed by Zackery Spytz in :gh:`65559`.) +* :meth:`pathlib.Path.glob` and :meth:`~pathlib.Path.rglob` now return both + files and directories if a pattern that ends with "``**``" is given, rather + than directories only. Users may add a trailing slash to match only + directories. + Build Changes =============