diff --git a/src/Glob.jl b/src/Glob.jl index 2286ff6..25dba6e 100644 --- a/src/Glob.jl +++ b/src/Glob.jl @@ -40,7 +40,7 @@ Returns a `Glob.FilenameMatch` object, which can be used with `ismatch()` or `oc * `i` = `CASELESS` : Performs case-insensitive matching * `p` = `PERIOD` : A leading period (`.`) character must be exactly matched by a period (`.`) character (not a `?`, `*`, or `[]`). A leading period is a period at the beginning of a string, or a period after a slash if PATHNAME is true. * `e` = `NOESCAPE` : Do not treat backslash (`\`) as a special character (in extended mode, this only outside of `[]`) -* `d` = `PATHNAME` : A slash (`/`) character must be exactly matched by a slash (`/`) character (not a `?`, `*`, or `[]`) +* `d` = `PATHNAME` : A slash (`/`) character must be exactly matched by a slash (`/`) character (not a `?`, `*`, or `[]`), "**/" matches yero or more directories (globstar) * `x` = `EXTENDED` : Additional features borrowed from newer shells, such as `bash` and `tcsh` * Backslash (`\`) characters in `[]` groups escape the next character """ @@ -64,74 +64,117 @@ function occursin(fn::FilenameMatch, s::AbstractString) noescape = (fn.options & NOESCAPE) != 0 pathname = (fn.options & PATHNAME) != 0 extended = (fn.options & EXTENDED) != 0 + + # if pattern ends with "**", append "/*" to allow matching of all files + pathname && endswith(pattern, "**") && (pattern *= "/*") + mi = firstindex(pattern) # current index into pattern i = firstindex(s) # current index into s starmatch = i - star = 0 + + # globstar_index = 1 + globstar_mi = 0 + globstarmatch = 0 period = periodfl + globstar_period = false # leading period detected during globstar match while true - matchnext = iterate(s, i) - matchnext === nothing && break - patnext = iterate(pattern, mi) - if patnext === nothing - match = false # string characters left to match, but no pattern left - else - mc, mi = patnext - if mc == '*' - starmatch = i # backup the current search index - star = mi - c, _ = matchnext # peek-ahead - if period & (c == '.') - return false # * does not match leading . - end - match = true + star = 0 + match_fails = false + globstar_mi > 0 && (mi = globstar_mi[end]) # reset pattern index of the latest globstar pattern, if it exists + while true + matchnext = iterate(s, i) + matchnext === nothing && break + patnext = iterate(pattern, mi) + if patnext === nothing + match = false # string characters left to match, but no pattern left else - c, i = matchnext - if mc == '[' - mi, valid, match = _match(pattern, mi, c, caseless, extended) - if pathname & valid & match & (c == '/') - match = false - end - if period & valid & match & (c == '.') - match = false - end - elseif mc == '?' - if pathname & (c == '/') - return false # ? does not match / - end + mc, mi = patnext + @debug s[i:end], pattern[mi:end] + if mc == '*' && pathname && length(pattern) > mi && pattern[mi:nextind(pattern, mi)] == "*/" + star = 0 + mi += 2 + globstarmatch = i + globstar_mi = mi + c = '/' # fake previous character to indicate end of directory + match = true + elseif mc == '*' + starmatch = i # backup the current search index + star = mi + c, _ = matchnext # peek-ahead if period & (c == '.') - return false # ? does not match leading . + globstar_period = globstarmatch > 0 + (match_fails = true) && break end match = true else - if (!noescape) & (mc == '\\') # escape the next character after backslash, unless it is the last character - patnext = iterate(pattern, mi) - if patnext !== nothing - mc, mi = patnext + c, i = matchnext + if mc == '[' + mi, valid, match = _match(pattern, mi, c, caseless, extended) + if pathname & valid & match & (c == '/') + match = false + end + if period & valid & match & (c == '.') + match = false + end + elseif mc == '?' + if pathname & (c == '/') + (match_fails = true) && break end + if period & (c == '.') + globstar_period = globstarmatch > 0 + (match_fails = true) && break + end + match = true + else + if (!noescape) & (mc == '\\') # escape the next character after backslash, unless it is the last character + patnext = iterate(pattern, mi) + if patnext !== nothing + mc, mi = patnext + end + end + match = ((c == mc) || (caseless && uppercase(c)==uppercase(mc))) end - match = ((c == mc) || (caseless && uppercase(c)==uppercase(mc))) + globstar_period = period && globstarmatch > 0 && (c == '.') end end + if !match # try to backtrack and add another character to the last * + (star == 0) && (match_fails = true) && break + c, i = something(iterate(s, starmatch)) # starmatch is strictly <= i, so it is known that it must be a valid index + if pathname & (c == '/') + (match_fails = true) && break # return false # * does not match / + end + mi = star + starmatch = i + end + period = (periodfl & pathname & (c == '/')) end - if !match # try to backtrack and add another character to the last * - star == 0 && return false - c, i = something(iterate(s, starmatch)) # starmatch is strictly <= i, so it is known that it must be a valid index - if pathname & (c == '/') - return false # * does not match / + while true # allow trailing *'s + patnext = iterate(pattern, mi) + patnext === nothing && break + mc, mi = patnext + if mc != '*' + # pattern characters left to match, but no string left + match_fails = true end - mi = star - starmatch = i end - period = (periodfl & pathname & (c == '/')) - end - while true # allow trailing *'s - patnext = iterate(pattern, mi) - patnext === nothing && break - mc, mi = patnext - mc == '*' || return false # pattern characters left to match, but no string left + if match_fails + # if in a globstar move to next directory, otherwise return false + if globstarmatch > 0 + x = findnext('/', s, globstarmatch[end]) + if x === nothing || globstar_period + globstarmatch = 0 + globstar_mi = 0 + globstar_period = false + else + globstarmatch = i = x + 1 + period = periodfl + end + end + globstarmatch == 0 && return false + else + return true + end end - return true end @deprecate ismatch(fn::FilenameMatch, s::AbstractString) occursin(fn, s) @@ -377,40 +420,135 @@ function show(io::IO, gm::GlobMatch) end """ - readdir(pattern::GlobMatch, [directory::AbstractString]) - -Alias for [`glob()`](@ref). -""" -readdir(pattern::GlobMatch, prefix::AbstractString="") = glob(pattern, prefix) - -""" - glob(pattern, [directory::AbstractString]) + glob(pattern, rootdir = ""; + relative::Union{Bool, Nothing} = nothing, + topdown::Bool = true, + follow_symlinks::Bool = true, + onerror::Union{Function, Nothing} = nothing + ) Returns a list of all files matching `pattern` in `directory`. -* If directory is not specified, it defaults to the current working directory. +* If rootdir is not specified, it defaults to the current working directory. * Pattern can be any of: - 1. A `Glob.GlobMatch` object: + 1. A `Glob.FilenameMatch` object: - glob"a/?/c" + `fn"a/?/c"dp` - 2. A string, which will be converted into a GlobMatch expression: + 2. A string, which will be converted into a FilenameMatch expression: - "a/?/c" # equivalent to 1, above + `"a/?/c" # equivalent to 1, above` 3. A vector of strings and/or objects which implement `occursin`, including `Regex` and `Glob.FilenameMatch` objects - ["a", r".", fn"c"] # again, equivalent to 1, above + `["a", r".", fn"c"] # almost equivalent to 1, above` but matching also files with leading '.' characters` * Each element of the vector will be used to match another level in the file hierarchy * no conversion of strings to `Glob.FilenameMatch` objects or directory splitting on `/` will occur. + 4. A `Glob.GlobMatch` object: + + ´glob"a/?/c/*/**/*.png"` + `glob"**"` + + * `glob(glob"<...>")`` requires exact matching of leading periods and supports globstar (**) matching + + Currently no options other than `rootdir` are supported for `Glob.GlobMatch` objects. + +* If `relative` is `true`, the returned paths will be relative to `rootdir`. +* If `filesonly` is `true`, only files will be returned. +* If `topdown` is `true`, the returned paths will be in top-down order. +* If `follow_symlinks` is `true`, symbolic links will be followed. +* `onerror` is a call back function, that will be called in case of an error. + A trailing `/` (or equivalently, a trailing empty string in the vector) will cause glob to only match directories. -Attempting to use a pattern with a leading `/` or the empty string is an error; use the `directory` argument to specify the absolute path to the directory in such a case. +Attempting to use a pattern with a leading `/` or the empty string is an error; use the `rootdir` argument to specify the absolute path to the directory in such a case. """ -function glob(pattern, prefix::AbstractString="") - matches = String[prefix] +function glob(fn::FilenameMatch, rootdir::AbstractString = ""; + relative::Union{Bool, Nothing} = nothing, + filesonly::Bool = false, + topdown::Bool = true, + follow_symlinks::Bool = true, + onerror::Union{Function, Nothing} = nothing +) + if isempty(fn.pattern) || first(fn.pattern) == '/' + error("Glob pattern cannot be empty or start with a '/' character") + end + + onerror === nothing && (onerror = (error -> @warn(error))) + + dirmode = endswith(fn.pattern, '/') + dirmode && filesonly && return String[] + dirmode && (fn = FilenameMatch(fn.pattern[1:end-1], fn.options)) + + relative === nothing && (relative = isempty(rootdir)) + isempty(rootdir) && (rootdir = pwd()) + + matches = String[] + firstlevel = true + for (root, dirs, files) in @static(VERSION < v"1.1" ? walkdir(rootdir) : walkdir(rootdir; follow_symlinks = follow_symlinks, onerror = onerror)) + if !dirmode & !filesonly + prepend!(files, dirs) + # don't add rootdir + firstlevel || pushfirst!(files, root) + firstlevel = false + end + for file in (dirmode ? dirs : files) + sleep(0) # needed for proper handling of `onerror` in `walkdir`, seems to be a bug + file = joinpath(root, file) + relfile = relpath(file, rootdir) + relpattern = Sys.iswindows() ? replace(relfile, '\\' => '/') : relfile + + occursin(fn, relpattern) && push!(matches, relative ? relfile : file) + end + end + + @static VERSION < v"1.4" && sort!(matches) + + if !dirmode & !filesonly + unique!(sort!(matches)) + end + topdown || reverse!(matches) + + return matches +end + +function glob(s::AbstractString, rootdir::AbstractString = ""; + relative::Union{Bool, Nothing} = nothing, + filesonly::Bool = false, + topdown::Bool = true, + follow_symlinks::Bool = true, + onerror::Union{Function, Nothing} = nothing +) + fn = FilenameMatch(s, PATHNAME | PERIOD) + glob(fn, rootdir; relative = relative, topdown = topdown, follow_symlinks = follow_symlinks, onerror = onerror) +end + +function glob(g::GlobMatch, rootdir::AbstractString = ""; + relative::Union{Bool, Nothing} = nothing, + filesonly::Bool = false, + topdown::Bool = true, + follow_symlinks::Bool = true, + onerror::Union{Function, Nothing} = nothing +) + any(isa.(g.pattern, Regex)) && return _glob(g, rootdir) + + fn = FilenameMatch(join([fn isa AbstractString ? fn : fn.pattern for fn in g.pattern], "/"), PATHNAME | PERIOD) + glob(fn, rootdir; relative = relative, topdown = topdown, follow_symlinks = follow_symlinks, onerror = onerror) +end + +glob(pattern, rootdir::AbstractString="") = _glob(pattern, rootdir) + +""" + readdir(pattern::GlobMatch, [directory::AbstractString]) + +Alias for [`glob()`](@ref). +""" +readdir(pattern::GlobMatch, rootdir::AbstractString="") = glob(pattern, rootdir) + +function _glob(pattern, rootdir::AbstractString="") + matches = String[rootdir] for pat in GlobMatch(pattern).pattern matches = _glob!(matches, pat) end diff --git a/test/runtests.jl b/test/runtests.jl index cb1112e..6c9fce6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -180,6 +180,55 @@ end @test glob([".", r".*"]) == map(x->joinpath(".",x), readdir()) @test all([!startswith(x,'.') for x in Glob.glob("*.*")]) +@test occursin(fn"**/*.png"d, "c.png") +@test occursin(fn"**/*.png"d, "a/c.png") +@test occursin(fn"**/*.png"d, "a/b/c.png") + +@test occursin(fn"/**/*.png"d, "/c.png") +@test occursin(fn"/**/*.png"d, "/a/c.png") +@test occursin(fn"/**/*.png"d, "/a/b/c.png") + +@test occursin(fn"**/*.png"d, "/c.png") +@test occursin(fn"**/*.png"d, "/a/c.png") +@test occursin(fn"**/*.png"d, "/a/b/c.png") + +@test !occursin(fn"/**/*.png"d, "c.png") +@test !occursin(fn"/**/*.png"d, "a/c.png") +@test !occursin(fn"/**/*.png"d, "a/b/c.png") + +@test occursin(fn"**.png"d, "c.png") +@test !occursin(fn"**.png"d, "a/b/c.png") + +@test occursin(fn"**"d, "c.png") +@test occursin(fn"**"d, "a/c.png") +@test occursin(fn"**"d, "/a/c.png") +@test occursin(fn"/**"d, "/a/c.png") +@test occursin(fn"/a/**"d, "/a/c.png") +@test !occursin(fn"/b/**"d, "/a/c.png") +@test !occursin(fn"/**"d, "a/c.png") + +@test occursin(fn"**/c/**/*"d, "a/b/c/d/e/test.png") +@test !occursin(fn"**/c/*/*"d, "a/b/c/d/e/test.png") +@test occursin(fn"**/c/**/*.png"d, "a/b/c/d/e/test.png") +@test !occursin(fn"**/c/**/*.png"d, "a/b/c/d/e/test.gif") + +@test occursin(fn"**/c/**/*.png"d, "a/b/c/d/e/.png") +@test occursin(fn"**/c/**/*png"d, "a/b/c/d/e/.png") +@test occursin(fn"**/c/**/?png"d, "a/b/c/d/e/.png") + +@test !occursin(fn"**/c/**/?png"dp, "a/b/c/d/e/.png") +@test !occursin(fn"**/c/**/*png"dp, "a/b/c/d/e/.png") + +@test !occursin(fn"**/c/**/?png"dp, "a/.b/c/d/e/apng") +@test !occursin(fn"**/c/**/?png"dp, ".a/b/c/d/e/apng") +@test !occursin(fn"**/c/**/?png"dp, "a/b/c/d/e/.png") +@test !occursin(fn"*/**/*.png"d, "c.png") +@test !occursin(fn"**/*/*.png"d, "c.png") + +@test occursin(fn"**/c/**/*png"dp, "a/b/c/d/e/*png") +@test occursin(fn"**/c/**/*png"d, "a/b/c/d/e/.png") +@test !occursin(fn"**/c/**/*png"dp, "a/b/c/d/e/.png") + function test_string(x1) x2 = string(eval(Meta.parse(x1))) x1 == x2 ? nothing : error(string(