Skip to content

Commit

Permalink
Merge pull request #1102 from giuseppe/zstd-chunked-support-sparse-files
Browse files Browse the repository at this point in the history
pkg/chunked: add support for sparse files
  • Loading branch information
rhatdan authored Jan 13, 2022
2 parents de0c6e9 + 1988208 commit 3ddd24a
Show file tree
Hide file tree
Showing 7 changed files with 593 additions and 85 deletions.
73 changes: 41 additions & 32 deletions pkg/chunked/cache_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,16 @@ func (c *layersCache) load() error {
continue
}

metadata, err := c.readMetadataFromCache(r.ID)
bigData, err := c.store.LayerBigData(r.ID, cacheKey)
if err != nil {
if errors.Cause(err) == os.ErrNotExist {
continue
}
return err
}
defer bigData.Close()

metadata, err := readMetadataFromCache(bigData)
if err != nil {
logrus.Warningf("Error reading cache file for layer %q: %v", r.ID, err)
}
Expand All @@ -117,7 +126,17 @@ func (c *layersCache) load() error {
continue
}

metadata, err = c.writeCache(r.ID)
manifestReader, err := c.store.LayerBigData(r.ID, bigDataKey)
if err != nil {
continue
}
defer manifestReader.Close()
manifest, err := ioutil.ReadAll(manifestReader)
if err != nil {
return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err)
}

metadata, err = writeCache(manifest, r.ID, c.store)
if err == nil {
c.addLayer(r.ID, metadata)
}
Expand Down Expand Up @@ -182,20 +201,25 @@ func generateTag(digest string, offset, len uint64) string {
return fmt.Sprintf("%s%.20d@%.20d", digest, offset, len)
}

type setBigData interface {
// SetLayerBigData stores a (possibly large) chunk of named data
SetLayerBigData(id, key string, data io.Reader) error
}

// writeCache write a cache for the layer ID.
// It generates a sorted list of digests with their offset to the path location and offset.
// The same cache is used to lookup files, chunks and candidates for deduplication with hard links.
// There are 3 kind of digests stored:
// - digest(file.payload))
// - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs)
// - digest(i) for each i in chunks(file payload)
func (c *layersCache) writeCache(id string) (*metadata, error) {
func writeCache(manifest []byte, id string, dest setBigData) (*metadata, error) {
var vdata bytes.Buffer
tagLen := 0
digestLen := 0
var tagsBuffer bytes.Buffer

toc, err := c.prepareMetadata(id)
toc, err := prepareMetadata(manifest)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -317,7 +341,7 @@ func (c *layersCache) writeCache(id string) (*metadata, error) {

r := io.TeeReader(pipeReader, counter)

if err := c.store.SetLayerBigData(id, cacheKey, r); err != nil {
if err := dest.SetLayerBigData(id, cacheKey, r); err != nil {
return nil, err
}

Expand All @@ -328,22 +352,14 @@ func (c *layersCache) writeCache(id string) (*metadata, error) {
logrus.Debugf("Written lookaside cache for layer %q with length %v", id, counter.Count)

return &metadata{
tagLen: tagLen,
tags: tagsBuffer.Bytes(),
vdata: vdata.Bytes(),
digestLen: digestLen,
tagLen: tagLen,
tags: tagsBuffer.Bytes(),
vdata: vdata.Bytes(),
}, nil
}

func (c *layersCache) readMetadataFromCache(id string) (*metadata, error) {
bigData, err := c.store.LayerBigData(id, cacheKey)
if err != nil {
if errors.Cause(err) == os.ErrNotExist {
return nil, nil
}
return nil, err
}
defer bigData.Close()

func readMetadataFromCache(bigData io.Reader) (*metadata, error) {
var version, tagLen, digestLen, tagsLen, vdataLen uint64
if err := binary.Read(bigData, binary.LittleEndian, &version); err != nil {
return nil, err
Expand All @@ -370,7 +386,7 @@ func (c *layersCache) readMetadataFromCache(id string) (*metadata, error) {
}

vdata := make([]byte, vdataLen)
if _, err = bigData.Read(vdata); err != nil {
if _, err := bigData.Read(vdata); err != nil {
return nil, err
}

Expand All @@ -382,17 +398,7 @@ func (c *layersCache) readMetadataFromCache(id string) (*metadata, error) {
}, nil
}

func (c *layersCache) prepareMetadata(id string) ([]*internal.FileMetadata, error) {
manifestReader, err := c.store.LayerBigData(id, bigDataKey)
if err != nil {
return nil, nil
}
defer manifestReader.Close()
manifest, err := ioutil.ReadAll(manifestReader)
if err != nil {
return nil, fmt.Errorf("open manifest file for layer %q: %w", id, err)
}

func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) {
toc, err := unmarshalToc(manifest)
if err != nil {
// ignore errors here. They might be caused by a different manifest format.
Expand All @@ -405,6 +411,7 @@ func (c *layersCache) prepareMetadata(id string) ([]*internal.FileMetadata, erro
d := toc.Entries[i].Digest
if d != "" {
r = append(r, &toc.Entries[i])
continue
}

// chunks do not use hard link dedup so keeping just one candidate is enough
Expand Down Expand Up @@ -473,7 +480,7 @@ func (c *layersCache) findDigestInternal(digest string) (string, string, int64,
if digest != "" {
position := string(layer.metadata.vdata[off : off+len])
parts := strings.SplitN(position, "@", 2)
offFile, _ := strconv.ParseInt(parts[1], 10, 64)
offFile, _ := strconv.ParseInt(parts[0], 10, 64)
return layer.target, parts[1], offFile, nil
}
}
Expand Down Expand Up @@ -517,7 +524,7 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) {
for iter.ReadArray() {
for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
switch field {
case "type", "name", "linkName", "digest", "chunkDigest":
case "type", "name", "linkName", "digest", "chunkDigest", "chunkType":
count += len(iter.ReadStringAsSlice())
case "xattrs":
for key := iter.ReadObject(); key != ""; key = iter.ReadObject() {
Expand Down Expand Up @@ -602,6 +609,8 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) {
m.ChunkOffset = iter.ReadInt64()
case "chunkDigest":
m.ChunkDigest = getString(iter.ReadStringAsSlice())
case "chunkType":
m.ChunkType = getString(iter.ReadStringAsSlice())
case "xattrs":
m.Xattrs = make(map[string]string)
for key := iter.ReadObject(); key != ""; key = iter.ReadObject() {
Expand Down
171 changes: 171 additions & 0 deletions pkg/chunked/cache_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
package chunked

import (
"bytes"
"fmt"
"io"
"reflect"
"testing"
)

const jsonTOC = `
{
"version": 1,
"entries": [
{
"type": "symlink",
"name": "bin",
"linkName": "usr/bin",
"mode": 511,
"modtime": "1970-01-01T01:00:00+01:00",
"accesstime": "0001-01-01T00:00:00Z",
"changetime": "0001-01-01T00:00:00Z"
},
{
"type": "dir",
"name": "usr/bin",
"mode": 511,
"modtime": "2022-01-07T12:36:43+01:00",
"accesstime": "0001-01-01T00:00:00Z",
"changetime": "0001-01-01T00:00:00Z"
},
{
"type": "reg",
"name": "usr/bin/foo",
"mode": 511,
"size": 103867,
"modtime": "1970-01-01T01:00:00+01:00",
"accesstime": "0001-01-01T00:00:00Z",
"changetime": "0001-01-01T00:00:00Z",
"digest": "sha256:99fe908c699dc068438b23e28319cadff1f2153c3043bafb8e83a430bba0a2c6",
"offset": 94149,
"endOffset": 120135,
"chunkSize": 17615,
"chunkDigest": "sha256:2ce0d0f8eb2aa93d13007097763e4459c814c8d0e859e5a57465af924169b544"
},
{
"type": "chunk",
"name": "usr/bin/foo",
"offset": 99939,
"chunkSize": 86252,
"chunkOffset": 17615,
"chunkDigest": "sha256:2a9d3f1b6b37abc8bb35eb8fa98b893a2a2447bcb01184c3bafc8c6b40da099d"
}
}
`

func TestPrepareMetadata(t *testing.T) {
toc, err := prepareMetadata([]byte(jsonTOC))
if err != nil {
t.Errorf("got error from prepareMetadata: %w", err)
}
if len(toc) != 2 {
t.Error("prepareMetadata returns the wrong length")
}
}

type bigDataToBuffer struct {
buf *bytes.Buffer
id string
key string
called bool
}

func (b *bigDataToBuffer) SetLayerBigData(id, key string, data io.Reader) error {
b.id = id
b.key = key
if b.called {
return fmt.Errorf("SetLayerBigData already called once")
}
b.called = true
_, err := io.Copy(b.buf, data)
return err
}

func TestWriteCache(t *testing.T) {
toc, err := prepareMetadata([]byte(jsonTOC))
if err != nil {
t.Errorf("got error from prepareMetadata: %w", err)
}

dest := bigDataToBuffer{
buf: bytes.NewBuffer(nil),
}
cache, err := writeCache([]byte(jsonTOC), "foobar", &dest)
if err != nil {
t.Errorf("got error from writeCache: %w", err)
}
if digest, _, _ := findTag("foobar", cache); digest != "" {
t.Error("found invalid tag")
}

for _, r := range toc {
if r.Digest != "" {
// find the element in the cache by the digest checksum
digest, off, len := findTag(r.Digest, cache)
if digest == "" {
t.Error("file tag not found")
}
if digest != r.Digest {
t.Error("wrong file found")
}
expectedLocation := generateFileLocation(r.Name, 0)
location := cache.vdata[off : off+len]
if !bytes.Equal(location, expectedLocation) {
t.Errorf("wrong file found %q instead of %q", location, expectedLocation)
}

fingerprint, err := calculateHardLinkFingerprint(r)
if err != nil {
t.Errorf("got error from writeCache: %w", err)
}

// find the element in the cache by the hardlink fingerprint
digest, off, len = findTag(fingerprint, cache)
if digest == "" {
t.Error("file tag not found")
}
if digest != fingerprint {
t.Error("wrong file found")
}
expectedLocation = generateFileLocation(r.Name, 0)
location = cache.vdata[off : off+len]
if !bytes.Equal(location, expectedLocation) {
t.Errorf("wrong file found %q instead of %q", location, expectedLocation)
}
}
if r.ChunkDigest != "" {
// find the element in the cache by the chunk digest checksum
digest, off, len := findTag(r.ChunkDigest, cache)
if digest == "" {
t.Error("chunk tag not found")
}
if digest != r.ChunkDigest {
t.Error("wrong digest found")
}
expectedLocation := generateFileLocation(r.Name, uint64(r.ChunkOffset))
location := cache.vdata[off : off+len]
if !bytes.Equal(location, expectedLocation) {
t.Errorf("wrong file found %q instead of %q", location, expectedLocation)
}
}
}
}

func TestReadCache(t *testing.T) {
dest := bigDataToBuffer{
buf: bytes.NewBuffer(nil),
}
cache, err := writeCache([]byte(jsonTOC), "foobar", &dest)
if err != nil {
t.Errorf("got error from writeCache: %w", err)
}

cacheRead, err := readMetadataFromCache(dest.buf)
if err != nil {
t.Errorf("got error from readMetadataFromCache: %w", err)
}
if !reflect.DeepEqual(cache, cacheRead) {
t.Errorf("read a different struct than what was written")
}
}
1 change: 0 additions & 1 deletion pkg/chunked/chunked

This file was deleted.

Loading

0 comments on commit 3ddd24a

Please sign in to comment.