Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Follow links in includedPaths to resolve incorrect caching when source path is behind symlink #2318

Merged
merged 4 commits into from
Sep 8, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 110 additions & 9 deletions cache/contenthash/checksum.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,21 +506,77 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
txn := cc.tree.Txn()
root = txn.Root()
var (
updated bool
iter *iradix.Iterator
k []byte
kOk bool
updated bool
iter *iradix.Iterator
k []byte
kOk bool
origPrefix string
resolvedPrefix string
)

iter = root.Iterator()

if opts.Wildcard {
k, _, kOk = iter.Next()
// For consistency with what the copy implementation in fsutil
// does: split pattern into non-wildcard prefix and rest of
// pattern, then follow symlinks when resolving the non-wildcard
// prefix.

d1, d2 := splitWildcards(p)
if d1 != "/" {
origPrefix = d1
k = convertPathToKey([]byte(d1))
linksWalked := 0
if d2 != "" {
// getFollowLinks only handles symlinks in path
// components before the last component, so
// handle last component in d1 specially.
for {
v, ok := root.Get(k)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I get this. Isn't this supposed to walk per component? see getFollowLinksWalk()

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a call to getFollowLinks below (line 568) which does the walk by component. This loop is just to handle the case where the final component is a symlink, which getFollowLinks does not handle.

I've added a test case for this specific scenario.


if !ok || v.(*CacheRecord).Type != CacheRecordTypeSymlink {
break
}

linksWalked++
if linksWalked > 255 {
return nil, errors.Errorf("too many links")
}

dirPath := path.Clean(d1)
if dirPath == "." || dirPath == "/" {
dirPath = ""
}
d1 = path.Clean(v.(*CacheRecord).Linkname)
if !path.IsAbs(d1) {
d1 = path.Clean(path.Join("/", path.Join(path.Dir(dirPath), d1)))
}
k = convertPathToKey([]byte(d1))
}
}
}
} else {
k = convertPathToKey([]byte(p))
if _, kOk = root.Get(k); kOk {
origPrefix = p
k = convertPathToKey([]byte(origPrefix))
}

if origPrefix != "" {
// We need to resolve symlinks here, in case the base path
// involves a symlink. That will match fsutil behavior of
// calling functions such as stat and walk.
var cr *CacheRecord
k, cr, err = getFollowLinks(root, k, true)
if err != nil {
return nil, err
}

if kOk = (cr != nil); kOk {
iter.SeekLowerBound(append(append([]byte{}, k...), 0))
}

resolvedPrefix = string(convertKeyToPath(k))
} else {
k, _, kOk = iter.Next()
}

var (
Expand All @@ -531,6 +587,20 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
for kOk {
fn := string(convertKeyToPath(k))

// Convert the path prefix from what we found in the prefix
// tree to what the argument specified.
//
// For example, if the original 'p' argument was /a/b and there
// is a symlink a->c, we want fn to be /a/b/foo rather than
// /c/b/foo. This is necessary to ensure correct pattern
// matching.
//
// When wildcards are enabled, this translation applies to the
// portion of 'p' before any wildcards.
if strings.HasPrefix(fn, resolvedPrefix) {
fn = origPrefix + strings.TrimPrefix(fn, resolvedPrefix)
}

for len(parentDirHeaders) != 0 {
lastParentDir := parentDirHeaders[len(parentDirHeaders)-1]
if strings.HasPrefix(fn, lastParentDir.Path+"/") {
Expand Down Expand Up @@ -570,8 +640,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
}
} else {
if !strings.HasPrefix(fn+"/", p+"/") {
k, _, kOk = iter.Next()
continue
break
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you explain this?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the beginning, we seek to the first path with this prefix (line 541). If we reach a path that don't match the prefix, we can stop iterating, because we're now outside the directory of interest. The old code would keep iterating, but there was no reason to - I think this was left over from before we had that initial seek. So breaking here is an optimization.

}

shouldInclude, err = shouldIncludePath(strings.TrimSuffix(strings.TrimPrefix(fn+"/", p+"/"), "/"), includePatternMatcher, excludePatternMatcher)
Expand Down Expand Up @@ -648,6 +717,38 @@ func shouldIncludePath(
return true, nil
}

func splitWildcards(p string) (d1, d2 string) {
parts := strings.Split(path.Join(p), "/")
var p1, p2 []string
var found bool
for _, p := range parts {
if !found && containsWildcards(p) {
found = true
}
if p == "" {
p = "/"
}
if !found {
p1 = append(p1, p)
} else {
p2 = append(p2, p)
}
}
return filepath.Join(p1...), filepath.Join(p2...)
}

func containsWildcards(name string) bool {
for i := 0; i < len(name); i++ {
ch := name[i]
if ch == '\\' {
i++
} else if ch == '*' || ch == '?' || ch == '[' {
return true
}
}
return false
}

func (cc *cacheContext) checksumNoFollow(ctx context.Context, m *mount, p string) (*CacheRecord, error) {
p = keyPath(p)

Expand Down
39 changes: 30 additions & 9 deletions cache/contenthash/checksum_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,8 +639,6 @@ func TestChecksumIncludeDoubleStar(t *testing.T) {
}

func TestChecksumIncludeSymlink(t *testing.T) {
t.Skip("Test will fail until https://github.com/moby/buildkit/issues/2300 is fixed")

t.Parallel()
tmpdir, err := ioutil.TempDir("", "buildkit-state")
require.NoError(t, err)
Expand All @@ -654,30 +652,53 @@ func TestChecksumIncludeSymlink(t *testing.T) {
ch := []string{
"ADD data dir",
"ADD data/d1 dir",
"ADD data/d1/d2 dir",
"ADD mnt dir",
"ADD mnt/data symlink ../data",
"ADD data/d1/foo file abc",
"ADD data/d1/d2/foo file abc",
}

ref := createRef(t, cm, ch)

cc, err := newCacheContext(ref.Metadata(), nil)
require.NoError(t, err)

dgst, err := cc.Checksum(context.TODO(), ref, "data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil)
dgstD1, err := cc.Checksum(context.TODO(), ref, "data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil)
require.NoError(t, err)
// File should be included
require.NotEqual(t, digest.FromBytes([]byte{}), dgstD1)

dgstMntD1, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil)
require.NoError(t, err)
// File should be included despite symlink
require.Equal(t, dgstD1, dgstMntD1)

dgstD2, err := cc.Checksum(context.TODO(), ref, "data/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil)
require.NoError(t, err)
// File should be included
require.NotEqual(t, digest.FromBytes([]byte{}), dgst)
require.NotEqual(t, digest.FromBytes([]byte{}), dgstD2)

dgst, err = cc.Checksum(context.TODO(), ref, "mnt/data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil)
dgstMntD2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil)
require.NoError(t, err)
// File should be included despite symlink
require.NotEqual(t, digest.FromBytes([]byte{}), dgst)
require.Equal(t, dgstD2, dgstMntD2)

// Same, with Wildcard = true
dgst, err = cc.Checksum(context.TODO(), ref, "mnt/data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
dgstMntD1Wildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
require.NoError(t, err)
require.Equal(t, dgstD1, dgstMntD1Wildcard)

dgstMntD1Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
require.NoError(t, err)
require.Equal(t, dgstD1, dgstMntD1Wildcard2)

dgstMntD2Wildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
require.NoError(t, err)
require.Equal(t, dgstD2, dgstMntD2Wildcard)

dgstMntD2Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d1/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
require.NoError(t, err)
require.NotEqual(t, digest.FromBytes([]byte{}), dgst)
require.Equal(t, dgstD2, dgstMntD2Wildcard2)
}

func TestHandleChange(t *testing.T) {
Expand Down