Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hh globstar #38

Closed
wants to merge 9 commits into from
274 changes: 206 additions & 68 deletions src/Glob.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Returns a `Glob.FilenameMatch` object, which can be used with `ismatch()` or `oc
* `i` = `CASELESS` : Performs case-insensitive matching
* `p` = `PERIOD` : A leading period (`.`) character must be exactly matched by a period (`.`) character (not a `?`, `*`, or `[]`). A leading period is a period at the beginning of a string, or a period after a slash if PATHNAME is true.
* `e` = `NOESCAPE` : Do not treat backslash (`\`) as a special character (in extended mode, this only outside of `[]`)
* `d` = `PATHNAME` : A slash (`/`) character must be exactly matched by a slash (`/`) character (not a `?`, `*`, or `[]`)
* `d` = `PATHNAME` : A slash (`/`) character must be exactly matched by a slash (`/`) character (not a `?`, `*`, or `[]`), "**/" matches yero or more directories (globstar)
* `x` = `EXTENDED` : Additional features borrowed from newer shells, such as `bash` and `tcsh`
* Backslash (`\`) characters in `[]` groups escape the next character
"""
Expand All @@ -64,74 +64,117 @@ function occursin(fn::FilenameMatch, s::AbstractString)
noescape = (fn.options & NOESCAPE) != 0
pathname = (fn.options & PATHNAME) != 0
extended = (fn.options & EXTENDED) != 0

# if pattern ends with "**", append "/*" to allow matching of all files
pathname && endswith(pattern, "**") && (pattern *= "/*")

mi = firstindex(pattern) # current index into pattern
i = firstindex(s) # current index into s
starmatch = i
star = 0

# globstar_index = 1
globstar_mi = 0
globstarmatch = 0
period = periodfl
globstar_period = false # leading period detected during globstar match
while true
matchnext = iterate(s, i)
matchnext === nothing && break
patnext = iterate(pattern, mi)
if patnext === nothing
match = false # string characters left to match, but no pattern left
else
mc, mi = patnext
if mc == '*'
starmatch = i # backup the current search index
star = mi
c, _ = matchnext # peek-ahead
if period & (c == '.')
return false # * does not match leading .
end
match = true
star = 0
match_fails = false
globstar_mi > 0 && (mi = globstar_mi[end]) # reset pattern index of the latest globstar pattern, if it exists
while true
matchnext = iterate(s, i)
matchnext === nothing && break
patnext = iterate(pattern, mi)
if patnext === nothing
match = false # string characters left to match, but no pattern left
else
c, i = matchnext
if mc == '['
mi, valid, match = _match(pattern, mi, c, caseless, extended)
if pathname & valid & match & (c == '/')
match = false
end
if period & valid & match & (c == '.')
match = false
end
elseif mc == '?'
if pathname & (c == '/')
return false # ? does not match /
end
mc, mi = patnext
@debug s[i:end], pattern[mi:end]
if mc == '*' && pathname && length(pattern) > mi && pattern[mi:nextind(pattern, mi)] == "*/"
star = 0
mi += 2
globstarmatch = i
globstar_mi = mi
c = '/' # fake previous character to indicate end of directory
match = true
elseif mc == '*'
starmatch = i # backup the current search index
star = mi
c, _ = matchnext # peek-ahead
if period & (c == '.')
return false # ? does not match leading .
globstar_period = globstarmatch > 0
(match_fails = true) && break
end
match = true
else
if (!noescape) & (mc == '\\') # escape the next character after backslash, unless it is the last character
patnext = iterate(pattern, mi)
if patnext !== nothing
mc, mi = patnext
c, i = matchnext
if mc == '['
mi, valid, match = _match(pattern, mi, c, caseless, extended)
if pathname & valid & match & (c == '/')
match = false
end
if period & valid & match & (c == '.')
match = false
end
elseif mc == '?'
if pathname & (c == '/')
(match_fails = true) && break
end
if period & (c == '.')
globstar_period = globstarmatch > 0
(match_fails = true) && break
end
match = true
else
if (!noescape) & (mc == '\\') # escape the next character after backslash, unless it is the last character
patnext = iterate(pattern, mi)
if patnext !== nothing
mc, mi = patnext
end
end
match = ((c == mc) || (caseless && uppercase(c)==uppercase(mc)))
end
match = ((c == mc) || (caseless && uppercase(c)==uppercase(mc)))
globstar_period = period && globstarmatch > 0 && (c == '.')
end
end
if !match # try to backtrack and add another character to the last *
(star == 0) && (match_fails = true) && break
c, i = something(iterate(s, starmatch)) # starmatch is strictly <= i, so it is known that it must be a valid index
if pathname & (c == '/')
(match_fails = true) && break # return false # * does not match /
end
mi = star
starmatch = i
end
period = (periodfl & pathname & (c == '/'))
end
if !match # try to backtrack and add another character to the last *
star == 0 && return false
c, i = something(iterate(s, starmatch)) # starmatch is strictly <= i, so it is known that it must be a valid index
if pathname & (c == '/')
return false # * does not match /
while true # allow trailing *'s
patnext = iterate(pattern, mi)
patnext === nothing && break
mc, mi = patnext
if mc != '*'
# pattern characters left to match, but no string left
match_fails = true
end
mi = star
starmatch = i
end
period = (periodfl & pathname & (c == '/'))
end
while true # allow trailing *'s
patnext = iterate(pattern, mi)
patnext === nothing && break
mc, mi = patnext
mc == '*' || return false # pattern characters left to match, but no string left
if match_fails
# if in a globstar move to next directory, otherwise return false
if globstarmatch > 0
x = findnext('/', s, globstarmatch[end])
if x === nothing || globstar_period
globstarmatch = 0
globstar_mi = 0
globstar_period = false
else
globstarmatch = i = x + 1
period = periodfl
end
end
globstarmatch == 0 && return false
else
return true
end
end
return true
end

@deprecate ismatch(fn::FilenameMatch, s::AbstractString) occursin(fn, s)
Expand Down Expand Up @@ -377,40 +420,135 @@ function show(io::IO, gm::GlobMatch)
end

"""
readdir(pattern::GlobMatch, [directory::AbstractString])

Alias for [`glob()`](@ref).
"""
readdir(pattern::GlobMatch, prefix::AbstractString="") = glob(pattern, prefix)

"""
glob(pattern, [directory::AbstractString])
glob(pattern, rootdir = "";
relative::Union{Bool, Nothing} = nothing,
topdown::Bool = true,
follow_symlinks::Bool = true,
onerror::Union{Function, Nothing} = nothing
)

Returns a list of all files matching `pattern` in `directory`.

* If directory is not specified, it defaults to the current working directory.
* If rootdir is not specified, it defaults to the current working directory.
* Pattern can be any of:
1. A `Glob.GlobMatch` object:
1. A `Glob.FilenameMatch` object:

glob"a/?/c"
`fn"a/?/c"dp`

2. A string, which will be converted into a GlobMatch expression:
2. A string, which will be converted into a FilenameMatch expression:

"a/?/c" # equivalent to 1, above
`"a/?/c" # equivalent to 1, above`

3. A vector of strings and/or objects which implement `occursin`, including `Regex` and `Glob.FilenameMatch` objects

["a", r".", fn"c"] # again, equivalent to 1, above
`["a", r".", fn"c"] # almost equivalent to 1, above` but matching also files with leading '.' characters`

* Each element of the vector will be used to match another level in the file hierarchy
* no conversion of strings to `Glob.FilenameMatch` objects or directory splitting on `/` will occur.

4. A `Glob.GlobMatch` object:

´glob"a/?/c/*/**/*.png"`
`glob"**"`

* `glob(glob"<...>")`` requires exact matching of leading periods and supports globstar (**) matching

Currently no options other than `rootdir` are supported for `Glob.GlobMatch` objects.

* If `relative` is `true`, the returned paths will be relative to `rootdir`.
* If `filesonly` is `true`, only files will be returned.
* If `topdown` is `true`, the returned paths will be in top-down order.
* If `follow_symlinks` is `true`, symbolic links will be followed.
* `onerror` is a call back function, that will be called in case of an error.

A trailing `/` (or equivalently, a trailing empty string in the vector) will cause glob to only match directories.

Attempting to use a pattern with a leading `/` or the empty string is an error; use the `directory` argument to specify the absolute path to the directory in such a case.
Attempting to use a pattern with a leading `/` or the empty string is an error; use the `rootdir` argument to specify the absolute path to the directory in such a case.
"""
function glob(pattern, prefix::AbstractString="")
matches = String[prefix]
function glob(fn::FilenameMatch, rootdir::AbstractString = "";
relative::Union{Bool, Nothing} = nothing,
filesonly::Bool = false,
topdown::Bool = true,
follow_symlinks::Bool = true,
onerror::Union{Function, Nothing} = nothing
)
if isempty(fn.pattern) || first(fn.pattern) == '/'
error("Glob pattern cannot be empty or start with a '/' character")
end

onerror === nothing && (onerror = (error -> @warn(error)))

dirmode = endswith(fn.pattern, '/')
dirmode && filesonly && return String[]
dirmode && (fn = FilenameMatch(fn.pattern[1:end-1], fn.options))

relative === nothing && (relative = isempty(rootdir))
isempty(rootdir) && (rootdir = pwd())

matches = String[]
firstlevel = true
for (root, dirs, files) in @static(VERSION < v"1.1" ? walkdir(rootdir) : walkdir(rootdir; follow_symlinks = follow_symlinks, onerror = onerror))
if !dirmode & !filesonly
prepend!(files, dirs)
# don't add rootdir
firstlevel || pushfirst!(files, root)
firstlevel = false
end
for file in (dirmode ? dirs : files)
sleep(0) # needed for proper handling of `onerror` in `walkdir`, seems to be a bug
file = joinpath(root, file)
relfile = relpath(file, rootdir)
relpattern = Sys.iswindows() ? replace(relfile, '\\' => '/') : relfile

occursin(fn, relpattern) && push!(matches, relative ? relfile : file)
end
end

@static VERSION < v"1.4" && sort!(matches)

if !dirmode & !filesonly
unique!(sort!(matches))
end
topdown || reverse!(matches)

return matches
end

function glob(s::AbstractString, rootdir::AbstractString = "";
relative::Union{Bool, Nothing} = nothing,
filesonly::Bool = false,
topdown::Bool = true,
follow_symlinks::Bool = true,
onerror::Union{Function, Nothing} = nothing
)
fn = FilenameMatch(s, PATHNAME | PERIOD)
glob(fn, rootdir; relative = relative, topdown = topdown, follow_symlinks = follow_symlinks, onerror = onerror)
end

function glob(g::GlobMatch, rootdir::AbstractString = "";
relative::Union{Bool, Nothing} = nothing,
filesonly::Bool = false,
topdown::Bool = true,
follow_symlinks::Bool = true,
onerror::Union{Function, Nothing} = nothing
)
any(isa.(g.pattern, Regex)) && return _glob(g, rootdir)

fn = FilenameMatch(join([fn isa AbstractString ? fn : fn.pattern for fn in g.pattern], "/"), PATHNAME | PERIOD)
glob(fn, rootdir; relative = relative, topdown = topdown, follow_symlinks = follow_symlinks, onerror = onerror)
end

glob(pattern, rootdir::AbstractString="") = _glob(pattern, rootdir)

"""
readdir(pattern::GlobMatch, [directory::AbstractString])

Alias for [`glob()`](@ref).
"""
readdir(pattern::GlobMatch, rootdir::AbstractString="") = glob(pattern, rootdir)

function _glob(pattern, rootdir::AbstractString="")
matches = String[rootdir]
for pat in GlobMatch(pattern).pattern
matches = _glob!(matches, pat)
end
Expand Down
49 changes: 49 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,55 @@ end
@test glob([".", r".*"]) == map(x->joinpath(".",x), readdir())
@test all([!startswith(x,'.') for x in Glob.glob("*.*")])

@test occursin(fn"**/*.png"d, "c.png")
@test occursin(fn"**/*.png"d, "a/c.png")
@test occursin(fn"**/*.png"d, "a/b/c.png")

@test occursin(fn"/**/*.png"d, "/c.png")
@test occursin(fn"/**/*.png"d, "/a/c.png")
@test occursin(fn"/**/*.png"d, "/a/b/c.png")

@test occursin(fn"**/*.png"d, "/c.png")
@test occursin(fn"**/*.png"d, "/a/c.png")
@test occursin(fn"**/*.png"d, "/a/b/c.png")

@test !occursin(fn"/**/*.png"d, "c.png")
@test !occursin(fn"/**/*.png"d, "a/c.png")
@test !occursin(fn"/**/*.png"d, "a/b/c.png")

@test occursin(fn"**.png"d, "c.png")
@test !occursin(fn"**.png"d, "a/b/c.png")

@test occursin(fn"**"d, "c.png")
@test occursin(fn"**"d, "a/c.png")
@test occursin(fn"**"d, "/a/c.png")
@test occursin(fn"/**"d, "/a/c.png")
@test occursin(fn"/a/**"d, "/a/c.png")
@test !occursin(fn"/b/**"d, "/a/c.png")
@test !occursin(fn"/**"d, "a/c.png")

@test occursin(fn"**/c/**/*"d, "a/b/c/d/e/test.png")
@test !occursin(fn"**/c/*/*"d, "a/b/c/d/e/test.png")
@test occursin(fn"**/c/**/*.png"d, "a/b/c/d/e/test.png")
@test !occursin(fn"**/c/**/*.png"d, "a/b/c/d/e/test.gif")

@test occursin(fn"**/c/**/*.png"d, "a/b/c/d/e/.png")
@test occursin(fn"**/c/**/*png"d, "a/b/c/d/e/.png")
@test occursin(fn"**/c/**/?png"d, "a/b/c/d/e/.png")

@test !occursin(fn"**/c/**/?png"dp, "a/b/c/d/e/.png")
@test !occursin(fn"**/c/**/*png"dp, "a/b/c/d/e/.png")

@test !occursin(fn"**/c/**/?png"dp, "a/.b/c/d/e/apng")
@test !occursin(fn"**/c/**/?png"dp, ".a/b/c/d/e/apng")
@test !occursin(fn"**/c/**/?png"dp, "a/b/c/d/e/.png")
@test !occursin(fn"*/**/*.png"d, "c.png")
@test !occursin(fn"**/*/*.png"d, "c.png")

@test occursin(fn"**/c/**/*png"dp, "a/b/c/d/e/*png")
@test occursin(fn"**/c/**/*png"d, "a/b/c/d/e/.png")
@test !occursin(fn"**/c/**/*png"dp, "a/b/c/d/e/.png")

function test_string(x1)
x2 = string(eval(Meta.parse(x1)))
x1 == x2 ? nothing : error(string(
Expand Down