Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move the tar-split digest into the TOC #1902

Merged
merged 5 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 79 additions & 70 deletions pkg/chunked/cache_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -823,81 +823,90 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) {
iter := jsoniter.ParseBytes(jsoniter.ConfigFastest, manifest)

for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
if strings.ToLower(field) == "version" {
switch strings.ToLower(field) {
case "version":
toc.Version = iter.ReadInt()
continue
}
if strings.ToLower(field) != "entries" {
iter.Skip()
continue
}
for iter.ReadArray() {
var m internal.FileMetadata
for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
switch strings.ToLower(field) {
case "type":
m.Type = iter.ReadString()
case "name":
m.Name = iter.ReadString()
case "linkname":
m.Linkname = iter.ReadString()
case "mode":
m.Mode = iter.ReadInt64()
case "size":
m.Size = iter.ReadInt64()
case "uid":
m.UID = iter.ReadInt()
case "gid":
m.GID = iter.ReadInt()
case "modtime":
time, err := time.Parse(time.RFC3339, iter.ReadString())
if err != nil {
return nil, err
}
m.ModTime = &time
case "accesstime":
time, err := time.Parse(time.RFC3339, iter.ReadString())
if err != nil {
return nil, err
}
m.AccessTime = &time
case "changetime":
time, err := time.Parse(time.RFC3339, iter.ReadString())
if err != nil {
return nil, err
}
m.ChangeTime = &time
case "devmajor":
m.Devmajor = iter.ReadInt64()
case "devminor":
m.Devminor = iter.ReadInt64()
case "digest":
m.Digest = iter.ReadString()
case "offset":
m.Offset = iter.ReadInt64()
case "endoffset":
m.EndOffset = iter.ReadInt64()
case "chunksize":
m.ChunkSize = iter.ReadInt64()
case "chunkoffset":
m.ChunkOffset = iter.ReadInt64()
case "chunkdigest":
m.ChunkDigest = iter.ReadString()
case "chunktype":
m.ChunkType = iter.ReadString()
case "xattrs":
m.Xattrs = make(map[string]string)
for key := iter.ReadObject(); key != ""; key = iter.ReadObject() {
m.Xattrs[key] = iter.ReadString()

case "entries":
for iter.ReadArray() {
var m internal.FileMetadata
for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
switch strings.ToLower(field) {
case "type":
m.Type = iter.ReadString()
case "name":
m.Name = iter.ReadString()
case "linkname":
m.Linkname = iter.ReadString()
case "mode":
m.Mode = iter.ReadInt64()
case "size":
m.Size = iter.ReadInt64()
case "uid":
m.UID = iter.ReadInt()
case "gid":
m.GID = iter.ReadInt()
case "modtime":
time, err := time.Parse(time.RFC3339, iter.ReadString())
if err != nil {
return nil, err
}
m.ModTime = &time
case "accesstime":
time, err := time.Parse(time.RFC3339, iter.ReadString())
if err != nil {
return nil, err
}
m.AccessTime = &time
case "changetime":
time, err := time.Parse(time.RFC3339, iter.ReadString())
if err != nil {
return nil, err
}
m.ChangeTime = &time
case "devmajor":
m.Devmajor = iter.ReadInt64()
case "devminor":
m.Devminor = iter.ReadInt64()
case "digest":
m.Digest = iter.ReadString()
case "offset":
m.Offset = iter.ReadInt64()
case "endoffset":
m.EndOffset = iter.ReadInt64()
case "chunksize":
m.ChunkSize = iter.ReadInt64()
case "chunkoffset":
m.ChunkOffset = iter.ReadInt64()
case "chunkdigest":
m.ChunkDigest = iter.ReadString()
case "chunktype":
m.ChunkType = iter.ReadString()
case "xattrs":
m.Xattrs = make(map[string]string)
for key := iter.ReadObject(); key != ""; key = iter.ReadObject() {
m.Xattrs[key] = iter.ReadString()
}
default:
iter.Skip()
}
default:
iter.Skip()
}
if m.Type == TypeReg && m.Size == 0 && m.Digest == "" {
m.Digest = digestSha256Empty
}
toc.Entries = append(toc.Entries, m)
}
if m.Type == TypeReg && m.Size == 0 && m.Digest == "" {
m.Digest = digestSha256Empty

case "tarsplitdigest": // strings.ToLower("tarSplitDigest")
s := iter.ReadString()
d, err := digest.Parse(s)
if err != nil {
return nil, fmt.Errorf("Invalid tarSplitDigest %q: %w", s, err)
}
toc.Entries = append(toc.Entries, m)
toc.TarSplitDigest = d

default:
iter.Skip()
}
}

Expand Down
38 changes: 21 additions & 17 deletions pkg/chunked/compression_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,37 +133,36 @@ func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64,
}

// readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream.
func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Digest, annotations map[string]string) ([]byte, []byte, int64, error) {
// Returns (manifest blob, parsed manifest, tar-split blob, manifest offset).
func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Digest, annotations map[string]string) ([]byte, *internal.TOC, []byte, int64, error) {
offsetMetadata := annotations[internal.ManifestInfoKey]
if offsetMetadata == "" {
return nil, nil, 0, fmt.Errorf("%q annotation missing", internal.ManifestInfoKey)
return nil, nil, nil, 0, fmt.Errorf("%q annotation missing", internal.ManifestInfoKey)
}
var manifestChunk ImageSourceChunk
var manifestLengthUncompressed, manifestType uint64
if _, err := fmt.Sscanf(offsetMetadata, "%d:%d:%d:%d", &manifestChunk.Offset, &manifestChunk.Length, &manifestLengthUncompressed, &manifestType); err != nil {
return nil, nil, 0, err
return nil, nil, nil, 0, err
}
// The tarSplit… values are valid if tarSplitChunk.Offset > 0
var tarSplitChunk ImageSourceChunk
var tarSplitLengthUncompressed uint64
var tarSplitChecksum string
if tarSplitInfoKeyAnnotation, found := annotations[internal.TarSplitInfoKey]; found {
if _, err := fmt.Sscanf(tarSplitInfoKeyAnnotation, "%d:%d:%d", &tarSplitChunk.Offset, &tarSplitChunk.Length, &tarSplitLengthUncompressed); err != nil {
return nil, nil, 0, err
return nil, nil, nil, 0, err
}
tarSplitChecksum = annotations[internal.TarSplitChecksumKey]
}

if manifestType != internal.ManifestTypeCRFS {
return nil, nil, 0, errors.New("invalid manifest type")
return nil, nil, nil, 0, errors.New("invalid manifest type")
}

// set a reasonable limit
if manifestChunk.Length > (1<<20)*50 {
return nil, nil, 0, errors.New("manifest too big")
return nil, nil, nil, 0, errors.New("manifest too big")
}
if manifestLengthUncompressed > (1<<20)*50 {
return nil, nil, 0, errors.New("manifest too big")
return nil, nil, nil, 0, errors.New("manifest too big")
}

chunks := []ImageSourceChunk{manifestChunk}
Expand All @@ -172,7 +171,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
}
parts, errs, err := blobStream.GetBlobAt(chunks)
if err != nil {
return nil, nil, 0, err
return nil, nil, nil, 0, err
}

readBlob := func(len uint64) ([]byte, error) {
Expand All @@ -197,32 +196,37 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di

manifest, err := readBlob(manifestChunk.Length)
if err != nil {
return nil, nil, 0, err
return nil, nil, nil, 0, err
}

decodedBlob, err := decodeAndValidateBlob(manifest, manifestLengthUncompressed, tocDigest.String())
if err != nil {
return nil, nil, 0, err
return nil, nil, nil, 0, fmt.Errorf("validating and decompressing TOC: %w", err)
}
toc, err := unmarshalToc(decodedBlob)
if err != nil {
return nil, nil, nil, 0, fmt.Errorf("unmarshaling TOC: %w", err)
}

decodedTarSplit := []byte{}
if tarSplitChunk.Offset > 0 {
tarSplit, err := readBlob(tarSplitChunk.Length)
if err != nil {
return nil, nil, 0, err
return nil, nil, nil, 0, err
}

decodedTarSplit, err = decodeAndValidateBlob(tarSplit, tarSplitLengthUncompressed, tarSplitChecksum)
decodedTarSplit, err = decodeAndValidateBlob(tarSplit, tarSplitLengthUncompressed, toc.TarSplitDigest.String())
if err != nil {
return nil, nil, 0, err
return nil, nil, nil, 0, fmt.Errorf("validating and decompressing tar-split: %w", err)
}
}
return decodedBlob, decodedTarSplit, int64(manifestChunk.Offset), err
return decodedBlob, toc, decodedTarSplit, int64(manifestChunk.Offset), err
}

func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedCompressedChecksum string) ([]byte, error) {
d, err := digest.Parse(expectedCompressedChecksum)
if err != nil {
return nil, err
return nil, fmt.Errorf("invalid digest %q: %w", expectedCompressedChecksum, err)
}

blobDigester := d.Algorithm().Digester()
Expand Down
14 changes: 8 additions & 6 deletions pkg/chunked/internal/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ import (
)

type TOC struct {
Version int `json:"version"`
Entries []FileMetadata `json:"entries"`
Version int `json:"version"`
Entries []FileMetadata `json:"entries"`
TarSplitDigest digest.Digest `json:"tarSplitDigest,omitempty"`
}

type FileMetadata struct {
Expand Down Expand Up @@ -84,9 +85,10 @@ func GetType(t byte) (string, error) {
const (
ManifestChecksumKey = "io.github.containers.zstd-chunked.manifest-checksum"
ManifestInfoKey = "io.github.containers.zstd-chunked.manifest-position"
TarSplitChecksumKey = "io.github.containers.zstd-chunked.tarsplit-checksum"
TarSplitInfoKey = "io.github.containers.zstd-chunked.tarsplit-position"

TarSplitChecksumKey = "io.github.containers.zstd-chunked.tarsplit-checksum" // Deprecated: Use the TOC.TarSplitDigest field instead, this annotation is no longer read nor written.

// ManifestTypeCRFS is a manifest file compatible with the CRFS TOC file.
ManifestTypeCRFS = 1

Expand Down Expand Up @@ -133,8 +135,9 @@ func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, off
manifestOffset := offset + zstdSkippableFrameHeader

toc := TOC{
Version: 1,
Entries: metadata,
Version: 1,
Entries: metadata,
TarSplitDigest: tarSplitData.Digest,
}

json := jsoniter.ConfigCompatibleWithStandardLibrary
Expand Down Expand Up @@ -170,7 +173,6 @@ func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, off
return err
}

outMetadata[TarSplitChecksumKey] = tarSplitData.Digest.String()
tarSplitOffset := manifestOffset + uint64(len(compressedManifest)) + zstdSkippableFrameHeader
outMetadata[TarSplitInfoKey] = fmt.Sprintf("%d:%d:%d", tarSplitOffset, len(tarSplitData.Data), tarSplitData.UncompressedSize)
if err := appendZstdSkippableFrame(dest, tarSplitData.Data); err != nil {
Expand Down
17 changes: 12 additions & 5 deletions pkg/chunked/storage_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ type compressedFileType int
type chunkedDiffer struct {
stream ImageSourceSeekable
manifest []byte
toc *internal.TOC // The parsed contents of manifest, or nil if not yet available
tarSplit []byte
layersCache *layersCache
tocOffset int64
Expand Down Expand Up @@ -314,7 +315,7 @@ func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobDige
}

func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(iss, tocDigest, annotations)
manifest, toc, tarSplit, tocOffset, err := readZstdChunkedManifest(iss, tocDigest, annotations)
if err != nil {
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
}
Expand All @@ -331,6 +332,7 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in
fileType: fileTypeZstdChunked,
layersCache: layersCache,
manifest: manifest,
toc: toc,
storeOpts: storeOpts,
stream: iss,
tarSplit: tarSplit,
Expand Down Expand Up @@ -1701,7 +1703,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
if tocDigest == nil {
return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("internal error: just-created zstd:chunked missing TOC digest")
}
manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(fileSource, *tocDigest, annotations)
manifest, toc, tarSplit, tocOffset, err := readZstdChunkedManifest(fileSource, *tocDigest, annotations)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("read zstd:chunked manifest: %w", err)
}
Expand All @@ -1712,6 +1714,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
// fill the chunkedDiffer with the data we just read.
c.fileType = fileTypeZstdChunked
c.manifest = manifest
c.toc = toc
c.tarSplit = tarSplit
c.tocOffset = tocOffset

Expand All @@ -1732,9 +1735,13 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
}

// Generate the manifest
toc, err := unmarshalToc(c.manifest)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
toc := c.toc
if toc == nil {
toc_, err := unmarshalToc(c.manifest)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
toc = toc_
}

output := graphdriver.DriverWithDifferOutput{
Expand Down
8 changes: 4 additions & 4 deletions pkg/chunked/zstdchunked_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/containers/storage/pkg/chunked/toc"
"github.com/klauspost/compress/zstd"
"github.com/opencontainers/go-digest"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -153,10 +154,8 @@ func TestGenerateAndParseManifest(t *testing.T) {
tocDigest, err := toc.GetTOCDigest(annotations)
require.NoError(t, err)
require.NotNil(t, tocDigest)
manifest, _, _, err := readZstdChunkedManifest(s, *tocDigest, annotations)
if err != nil {
t.Error(err)
}
manifest, decodedTOC, _, _, err := readZstdChunkedManifest(s, *tocDigest, annotations)
require.NoError(t, err)

var toc internal.TOC
if err := json.Unmarshal(manifest, &toc); err != nil {
Expand All @@ -169,6 +168,7 @@ func TestGenerateAndParseManifest(t *testing.T) {
if len(toc.Entries) != len(someFiles) {
t.Fatal("Manifest mismatch")
}
assert.Equal(t, toc, *decodedTOC)
}

func TestGetTarType(t *testing.T) {
Expand Down