Skip to content

Commit

Permalink
pkg: forbid ** in path.Match and tool/file.Glob
Browse files Browse the repository at this point in the history
Currently the double-star pattern term `**` behaves like a star `*`,
as one star matches any sequence of non-separator characters
and the second star matches nothing.

This is confusing, as one may write the pattern `**/*.json` and it can
match strings like `foo/bar.json`, making it seem like `**` works as
a double star that would also match `foo.json` or `foo/bar/baz.json`
when in fact it does not.

If we choose to support `**` patterns in the future to match separators,
this would mean changing the behavior for existing users in a subtle way
which could lead to unintended behavior or latent bugs.
It is clearer for existing users to treat `**` as an invalid pattern,
which would allow us to add the feature in the future without concern
about existing users already using such patterns without error.

We also mirror Go's path.Match behavior to consume the entire pattern
to validate that it is always syntactically valid, so that we may
use it to check for `**` even when the matching has otherwise ended.
See https://go.dev/issue/28614 for details; we largely copied these
semantics from Go 1.15 already, but we omitted a bit of code that was
necessary for trailing double-stars to be caught as an error.

This change is driven by the embed proposal, where recursively embedding
entire directory trees is a relatively common use case that we want
to support somehow, and users might be misled by `**` silently working
in a way that doesn't match the user's expectation. We want path.Match,
tool/file.Glob, and embed patterns to behave consistently, so for that
reason, start consistently treating `**` as an error.

Note that this is a breaking change for any users whose pattern
includes `**` as two consecutive wildcards, and this is on purpose
for the reasons outlined above. Users who run into this problem
may not have been aware that it behaved like a single wildcard,
and in the majority of cases they should be able to use `*` instead.

For #1919.
Fixes #3315.

Signed-off-by: Daniel Martí <[email protected]>
Change-Id: Ifeb81f4818a863cd0e4c5c13bc0fc2ac54a0f22f
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1198636
Unity-Result: CUE porcuepine <[email protected]>
TryBot-Result: CUEcueckoo <[email protected]>
Reviewed-by: Roger Peppe <[email protected]>
  • Loading branch information
mvdan committed Jul 31, 2024
1 parent 25bb3d4 commit 987a85e
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 14 deletions.
28 changes: 24 additions & 4 deletions pkg/path/match.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import (
// ErrBadPattern indicates a pattern was malformed.
var ErrBadPattern = errors.New("syntax error in pattern")

var errStarStarDisallowed = errors.New("'**' is not supported in patterns as of yet")

// Match reports whether name matches the shell file name pattern.
// The pattern syntax is:
//
Expand All @@ -51,13 +53,19 @@ var ErrBadPattern = errors.New("syntax error in pattern")
//
// On Windows, escaping is disabled. Instead, '\\' is treated as
// path separator.
//
// A pattern may not contain '**', as a wildcard matching separator characters
// is not supported at this time.
func Match(pattern, name string, o OS) (matched bool, err error) {
os := getOS(o)
Pattern:
for len(pattern) > 0 {
var star bool
var chunk string
star, chunk, pattern = scanChunk(pattern, os)
star, chunk, pattern, err = scanChunk(pattern, os)
if err != nil {
return false, err
}
if star && chunk == "" {
// Trailing * matches rest of string unless it has a /.
return !strings.Contains(name, string(os.Separator)), nil
Expand Down Expand Up @@ -92,17 +100,29 @@ Pattern:
}
}
}
// Before returning false with no error,
// check that the remainder of the pattern is syntactically valid.
for len(pattern) > 0 {
_, chunk, pattern, err = scanChunk(pattern, os)
if err != nil {
return false, err
}
}
return false, nil
}
return len(name) == 0, nil
}

// scanChunk gets the next segment of pattern, which is a non-star string
// possibly preceded by a star.
func scanChunk(pattern string, os os) (star bool, chunk, rest string) {
for len(pattern) > 0 && pattern[0] == '*' {
func scanChunk(pattern string, os os) (star bool, chunk, rest string, _ error) {
if len(pattern) > 0 && pattern[0] == '*' {
pattern = pattern[1:]
star = true
if len(pattern) > 0 && pattern[0] == '*' {
// ** is disallowed to allow for future functionality.
return false, "", "", errStarStarDisallowed
}
}
inrange := false
var i int
Expand All @@ -126,7 +146,7 @@ Scan:
}
}
}
return star, pattern[0:i], pattern[i:]
return star, pattern[0:i], pattern[i:], nil
}

// matchChunk checks whether chunk matches the beginning of s.
Expand Down
7 changes: 3 additions & 4 deletions pkg/path/match_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,9 @@ var matchTests = []MatchTest{
{"a[", "x", false, ErrBadPattern},
{"a/b[", "x", false, ErrBadPattern},
{"*x", "xxx", true, nil},
// TODO(mvdan): this should fail; right now "**" happens to behave like "*".
{"**", "ab/c", false, nil},
{"**/c", "ab/c", true, nil},
{"a/b/**", "", false, nil},
{"**", "ab/c", false, errStarStarDisallowed},
{"**/c", "ab/c", false, errStarStarDisallowed},
{"a/b/**", "", false, errStarStarDisallowed},
{"\\**", "*ab", true, nil},
{"[x**y]", "*", true, nil},
}
Expand Down
12 changes: 12 additions & 0 deletions pkg/tool/file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ package file
import (
"os"
"path/filepath"
"runtime"

"cuelang.org/go/cue"
"cuelang.org/go/cue/errors"
"cuelang.org/go/internal/task"
pkgpath "cuelang.org/go/pkg/path"
)

func init() {
Expand Down Expand Up @@ -110,6 +112,16 @@ func (c *cmdGlob) Run(ctx *task.Context) (res interface{}, err error) {
if ctx.Err != nil {
return nil, ctx.Err
}
// Validate that the glob pattern is valid per [pkgpath.Match].
// Note that we use the current OS to match the semantics of [filepath.Glob],
// and since the APIs in this package are meant to support native paths.
os := pkgpath.Unix
if runtime.GOOS == "windows" {
os = pkgpath.Windows
}
if _, err := pkgpath.Match(glob, "", os); err != nil {
return nil, err
}
m, err := filepath.Glob(glob)
for i, s := range m {
m[i] = filepath.ToSlash(s)
Expand Down
10 changes: 4 additions & 6 deletions pkg/tool/file/file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,12 @@ func TestGlob(t *testing.T) {
qt.Assert(t, qt.DeepEquals(got, any(map[string]any{"files": []string{"testdata/input.foo"}})))

// globstar or recursive globbing is not supported.
// TODO(mvdan): this should fail; right now "**" happens to behave like "*".
v = parse(t, "tool/file.Glob", `{
glob: "testdata/**/glob.leaf"
}`)
got, err = (*cmdGlob).Run(nil, &task.Context{Obj: v})
qt.Assert(t, qt.IsNil(err))
qt.Assert(t, qt.DeepEquals(got, any(map[string]any{"files": []string{"testdata/glob1/glob.leaf"}})))
qt.Assert(t, qt.IsNotNil(err))
qt.Assert(t, qt.IsNil(got))
}

func TestGlobEscapeStar(t *testing.T) {
Expand All @@ -151,9 +150,8 @@ func TestGlobEscapeStar(t *testing.T) {
}`)
got, err := (*cmdGlob).Run(nil, &task.Context{Obj: v})
if runtime.GOOS == "windows" {
// TODO(mvdan): this should fail; right now "**" happens to behave like "*".
qt.Assert(t, qt.IsNil(err))
qt.Assert(t, qt.DeepEquals(got, any(map[string]any{"files": []string(nil)})))
qt.Assert(t, qt.IsNotNil(err))
qt.Assert(t, qt.Equals(got, nil))
} else {
qt.Assert(t, qt.IsNil(err))
qt.Assert(t, qt.DeepEquals(got, any(map[string]any{"files": []string{leafFile}})))
Expand Down

0 comments on commit 987a85e

Please sign in to comment.