Skip to content

Commit

Permalink
pythongh-73435 - Implement recursive wildcards in pathlib.PurePath.ma…
Browse files Browse the repository at this point in the history
…tch()

Add a new *recursive* argument to `pathlib.PurePath.match()`, defaulting
to `False`. If set to true, `match()` handles the `**` wildcard as in
`Path.glob()`, i.e. it matches any number of path segments.

We now compile a `re.Pattern` object for the entire pattern. This is made
more difficult by `fnmatch` not treating directory separators as special
when evaluating wildcards (`*`, `?`, etc), and so we arrange the path parts
onto separate *lines* in a string, and ensure we don't set `re.DOTALL`.
  • Loading branch information
barneygale committed Jan 28, 2023
1 parent b5c4d60 commit 2cd4ab7
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 22 deletions.
8 changes: 7 additions & 1 deletion Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -544,11 +544,14 @@ Pure paths provide the following methods and properties:
PureWindowsPath('c:/Program Files')


.. method:: PurePath.match(pattern)
.. method:: PurePath.match(pattern, recursive=False)

Match this path against the provided glob-style pattern. Return ``True``
if matching is successful, ``False`` otherwise.

If *recursive* is true, the pattern "``**``" will match any number of file
or directory segments.

If *pattern* is relative, the path can be either relative or absolute,
and matching is done from the right::

Expand All @@ -574,6 +577,9 @@ Pure paths provide the following methods and properties:
>>> PureWindowsPath('b.py').match('*.PY')
True

.. versionadded:: 3.12
The *recursive* argument.


.. method:: PurePath.relative_to(other, walk_up=False)

Expand Down
7 changes: 6 additions & 1 deletion Lib/fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ def translate(pat):
There is no way to quote meta-characters.
"""

res = _translate(pat)
return fr'(?s:{res})\Z'


def _translate(pat):
STAR = object()
res = []
add = res.append
Expand Down Expand Up @@ -182,4 +187,4 @@ def translate(pat):
add(f"(?>.*?{fixed})")
assert i == n
res = "".join(res)
return fr'(?s:{res})\Z'
return res
43 changes: 23 additions & 20 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,25 @@ def _is_wildcard_pattern(pat):
# Globbing helpers
#

@functools.lru_cache()
def _make_matcher(path_cls, pattern, recursive):
pattern = path_cls(pattern)
if not pattern._parts:
raise ValueError("empty pattern")
result = [r'\A' if pattern._drv or pattern._root else '^']
for part in pattern._parts_normcase:
if recursive:
if part == '**':
result.append('(.+\n)*')
continue
elif '**' in part:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
part = fnmatch._translate(part)
result.append(f'{part}\n')
result.append(r'\Z')
return re.compile(''.join(result), flags=re.MULTILINE)


@functools.lru_cache()
def _make_selector(pattern_parts, flavour):
pat = pattern_parts[0]
Expand Down Expand Up @@ -639,29 +658,13 @@ def is_reserved(self):
name = self._parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in _WIN_RESERVED_NAMES

def match(self, path_pattern):
def match(self, path_pattern, recursive=False):
"""
Return True if this path matches the given pattern.
"""
path_pattern = self._flavour.normcase(path_pattern)
drv, root, pat_parts = self._parse_parts((path_pattern,))
if not pat_parts:
raise ValueError("empty pattern")
elif drv and drv != self._flavour.normcase(self._drv):
return False
elif root and root != self._root:
return False
parts = self._parts_normcase
if drv or root:
if len(pat_parts) != len(parts):
return False
pat_parts = pat_parts[1:]
elif len(pat_parts) > len(parts):
return False
for part, pat in zip(reversed(parts), reversed(pat_parts)):
if not fnmatch.fnmatchcase(part, pat):
return False
return True
matcher = _make_matcher(type(self), path_pattern, recursive)
lines = ''.join(f'{part}\n' for part in self._parts_normcase)
return matcher.search(lines) is not None

# Can't subclass os.PathLike from PurePath and keep the constructor
# optimizations in PurePath._parse_args().
Expand Down

0 comments on commit 2cd4ab7

Please sign in to comment.