Skip to content

Commit

Permalink
find/glob: Don't use $. as a non-matching regex
Browse files Browse the repository at this point in the history
The oniguruma version built by onig_sys does not include the commit
6f7b0aa ("add ONIG_SYN_BRE_ANCHOR_AT_EDGE_OF_SUBEXP into
ONIG_SYNTAX_POSIX_BASIC and ONIG_SYNTAX_GREP"), causing `$.` to actually
match a literal newline.

Using `$.` is not portable anyway; POSIX says

> A <dollar-sign> ( '$' ) shall be an anchor when used as the last
> character of an entire BRE.
  • Loading branch information
tavianator committed Aug 21, 2024
1 parent abf851d commit 4be8be2
Showing 1 changed file with 31 additions and 21 deletions.
52 changes: 31 additions & 21 deletions src/find/matchers/glob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ fn extract_bracket_expr(pattern: &str) -> Option<(String, &str)> {
}

/// Converts a POSIX glob into a POSIX Basic Regular Expression
fn glob_to_regex(pattern: &str) -> String {
fn glob_to_regex(pattern: &str) -> Option<String> {
let mut regex = String::new();

let mut chars = pattern.chars();
Expand All @@ -130,9 +130,9 @@ fn glob_to_regex(pattern: &str) -> String {
// If pattern ends with an unescaped <backslash>, fnmatch() shall return a
// non-zero value (indicating either no match or an error).
//
// Most implementations return FNM_NOMATCH in this case, so return a regex that
// Most implementations return FNM_NOMATCH in this case, so create a pattern that
// never matches.
return "$.".to_string();
return None;
}
}
'[' => {
Expand All @@ -147,12 +147,12 @@ fn glob_to_regex(pattern: &str) -> String {
}
}

regex
Some(regex)
}

/// An fnmatch()-style glob matcher.
pub struct Pattern {
regex: Regex,
regex: Option<Regex>,
}

impl Pattern {
Expand All @@ -165,61 +165,66 @@ impl Pattern {
};

// As long as glob_to_regex() is correct, this should never fail
let regex = parse_bre(&glob_to_regex(pattern), options).unwrap();
let regex = glob_to_regex(pattern).map(|r| parse_bre(&r, options).unwrap());
Self { regex }
}

/// Test if this pattern matches a string.
pub fn matches(&self, string: &str) -> bool {
self.regex.is_match(string)
self.regex.as_ref().is_some_and(|r| r.is_match(string))
}
}

#[cfg(test)]
mod tests {
use super::*;

#[track_caller]
fn assert_glob_regex(glob: &str, regex: &str) {
assert_eq!(glob_to_regex(glob).as_deref(), Some(regex));
}

#[test]
fn literals() {
assert_eq!(glob_to_regex(r"foo.bar"), r"foo\.bar");
assert_glob_regex(r"foo.bar", r"foo\.bar");
}

#[test]
fn regex_special() {
assert_eq!(glob_to_regex(r"^foo.bar$"), r"\^foo\.bar\$");
assert_glob_regex(r"^foo.bar$", r"\^foo\.bar\$");
}

#[test]
fn wildcards() {
assert_eq!(glob_to_regex(r"foo?bar*baz"), r"foo.bar.*baz");
assert_glob_regex(r"foo?bar*baz", r"foo.bar.*baz");
}

#[test]
fn escapes() {
assert_eq!(glob_to_regex(r"fo\o\?bar\*baz\\"), r"foo?bar\*baz\\");
}

#[test]
fn incomplete_escape() {
assert_eq!(glob_to_regex(r"foo\"), r"$.");
assert_glob_regex(r"fo\o\?bar\*baz\\", r"foo?bar\*baz\\");
}

#[test]
fn valid_brackets() {
assert_eq!(glob_to_regex(r"foo[bar][!baz]"), r"foo[bar][^baz]");
assert_glob_regex(r"foo[bar][!baz]", r"foo[bar][^baz]");
}

#[test]
fn complex_brackets() {
assert_eq!(
glob_to_regex(r"[!]!.*[\[.].][=]=][:space:]-]"),
r"[^]!.*[\[.].][=]=][:space:]-]"
assert_glob_regex(
r"[!]!.*[\[.].][=]=][:space:]-]",
r"[^]!.*[\[.].][=]=][:space:]-]",
);
}

#[test]
fn invalid_brackets() {
assert_eq!(glob_to_regex(r"foo[bar[!baz"), r"foo\[bar\[!baz");
assert_glob_regex(r"foo[bar[!baz", r"foo\[bar\[!baz");
}

#[test]
fn incomplete_escape() {
assert_eq!(glob_to_regex(r"foo\"), None);
}

#[test]
Expand All @@ -235,4 +240,9 @@ mod tests {

assert!(!Pattern::new(r"foo*BAR", true).matches("BAR--foo"));
}

#[test]
fn incomplete_escape_matches() {
assert!(!Pattern::new(r"foo\", false).matches("\n"));
}
}

0 comments on commit 4be8be2

Please sign in to comment.