Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-81079: Add case_sensitive argument to pathlib.Path.glob() #102710

Merged
merged 13 commits into from
May 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.5


.. method:: Path.glob(pattern)
.. method:: Path.glob(pattern, *, case_sensitive=None)

Glob the given relative *pattern* in the directory represented by this path,
yielding all matching files (of any kind)::
Expand All @@ -876,6 +876,11 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]

By default, or when the *case_sensitive* keyword-only argument is set to
``None``, this method matches paths using platform-specific casing rules:
typically, case-sensitive on POSIX, and case-insensitive on Windows.
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.

.. note::
Using the "``**``" pattern in large directory trees may consume
an inordinate amount of time.
Expand All @@ -886,6 +891,9 @@ call fails (for example because the path doesn't exist).
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionadded:: 3.12
The *case_sensitive* argument.

.. method:: Path.group()

Return the name of the group owning the file. :exc:`KeyError` is raised
Expand Down Expand Up @@ -1271,7 +1279,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.6
The *strict* argument (pre-3.6 behavior is strict).

.. method:: Path.rglob(pattern)
.. method:: Path.rglob(pattern, *, case_sensitive=None)

Glob the given relative *pattern* recursively. This is like calling
:func:`Path.glob` with "``**/``" added in front of the *pattern*, where
Expand All @@ -1284,12 +1292,20 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]

By default, or when the *case_sensitive* keyword-only argument is set to
``None``, this method matches paths using platform-specific casing rules:
typically, case-sensitive on POSIX, and case-insensitive on Windows.
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.

.. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob

.. versionchanged:: 3.11
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionadded:: 3.12
The *case_sensitive* argument.

.. method:: Path.rmdir()

Remove this directory. The directory must be empty.
Expand Down
34 changes: 19 additions & 15 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def _is_case_sensitive(flavour):
#

@functools.lru_cache()
def _make_selector(pattern_parts, flavour):
def _make_selector(pattern_parts, flavour, case_sensitive):
pat = pattern_parts[0]
child_parts = pattern_parts[1:]
if not pat:
Expand All @@ -75,17 +75,17 @@ def _make_selector(pattern_parts, flavour):
raise ValueError("Invalid pattern: '**' can only be an entire path component")
else:
cls = _WildcardSelector
return cls(pat, child_parts, flavour)
return cls(pat, child_parts, flavour, case_sensitive)


class _Selector:
"""A selector matches a specific glob pattern part against the children
of a given path."""

def __init__(self, child_parts, flavour):
def __init__(self, child_parts, flavour, case_sensitive):
self.child_parts = child_parts
if child_parts:
self.successor = _make_selector(child_parts, flavour)
self.successor = _make_selector(child_parts, flavour, case_sensitive)
self.dironly = True
else:
self.successor = _TerminatingSelector()
Expand All @@ -108,8 +108,9 @@ def _select_from(self, parent_path, scandir):


class _ParentSelector(_Selector):
def __init__(self, name, child_parts, flavour):
_Selector.__init__(self, child_parts, flavour)

def __init__(self, name, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)

def _select_from(self, parent_path, scandir):
path = parent_path._make_child_relpath('..')
Expand All @@ -119,10 +120,13 @@ def _select_from(self, parent_path, scandir):

class _WildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour):
flags = re.NOFLAG if _is_case_sensitive(flavour) else re.IGNORECASE
def __init__(self, pat, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)
if case_sensitive is None:
# TODO: evaluate case-sensitivity of each directory in _select_from()
case_sensitive = _is_case_sensitive(flavour)
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
zooba marked this conversation as resolved.
Show resolved Hide resolved
self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
_Selector.__init__(self, child_parts, flavour)

def _select_from(self, parent_path, scandir):
try:
Expand Down Expand Up @@ -153,8 +157,8 @@ def _select_from(self, parent_path, scandir):

class _RecursiveWildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour):
_Selector.__init__(self, child_parts, flavour)
def __init__(self, pat, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)

def _iterate_directories(self, parent_path, scandir):
yield parent_path
Expand Down Expand Up @@ -819,7 +823,7 @@ def _scandir(self):
# includes scandir(), which is used to implement glob().
return os.scandir(self)

def glob(self, pattern):
def glob(self, pattern, *, case_sensitive=None):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
Expand All @@ -831,11 +835,11 @@ def glob(self, pattern):
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(tuple(pattern_parts), self._flavour)
selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive)
for p in selector.select_from(self):
yield p

def rglob(self, pattern):
def rglob(self, pattern, *, case_sensitive=None):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
Expand All @@ -846,7 +850,7 @@ def rglob(self, pattern):
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour)
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive)
for p in selector.select_from(self):
yield p

Expand Down
12 changes: 12 additions & 0 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1816,6 +1816,18 @@ def _check(glob, expected):
else:
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"])

def test_glob_case_sensitive(self):
P = self.cls
def _check(path, pattern, case_sensitive, expected):
actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)}
expected = {str(P(BASE, q)) for q in expected}
self.assertEqual(actual, expected)
path = P(BASE)
_check(path, "DIRB/FILE*", True, [])
_check(path, "DIRB/FILE*", False, ["dirB/fileB"])
_check(path, "dirb/file*", True, [])
_check(path, "dirb/file*", False, ["dirB/fileB"])

def test_rglob_common(self):
def _check(glob, expected):
self.assertEqual(set(glob), { P(BASE, q) for q in expected })
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add *case_sensitive* keyword-only argument to :meth:`pathlib.Path.glob` and
:meth:`~pathlib.Path.rglob`.