From 528944a219c9292ca8afcf8d93fcb43a122cd813 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Fri, 26 Jul 2019 14:55:36 +0200 Subject: [PATCH] copy: set media types When copying an image, record the compression in the BlobInfo and use the information when updating the manifest's layer infos to set the layers' media types correctly. Note that consumers of the containers/image library need to update opencontainers/image-spec to commit 775207bd45b6cb8153ce218cc59351799217451f. Fixes: github.com/containers/libpod/issues/2013 Fixes: github.com/containers/buildah/issues/1589 Signed-off-by: Valentin Rothberg --- copy/copy.go | 6 +++ image/docker_schema2.go | 18 +++++++-- image/oci.go | 18 ++++++++- image/sourced.go | 1 + manifest/docker_schema2.go | 56 +++++++++++++++++++++++++- manifest/manifest.go | 12 +++++- manifest/oci.go | 60 +++++++++++++++++++++++++++- pkg/compression/compression.go | 73 +++++++++++++++++++--------------- storage/storage_image.go | 6 +-- types/types.go | 40 ++++++++++--------- vendor.conf | 2 +- 11 files changed, 228 insertions(+), 64 deletions(-) diff --git a/copy/copy.go b/copy/copy.go index 16c7900c67..6af46c6514 100644 --- a/copy/copy.go +++ b/copy/copy.go @@ -911,6 +911,12 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr return types.BlobInfo{}, errors.Wrap(err, "Error writing blob") } + uploadedInfo.CompressionOperation = compressionOperation + // If we can modify the layer's blob, set the desired algorithm for it to be set in the manifest. + if canModifyBlob && !isConfig { + uploadedInfo.CompressionAlgorithm = &desiredCompressionFormat + } + // This is fairly horrible: the writer from getOriginalLayerCopyWriter wants to consumer // all of the input (to compute DiffIDs), even if dest.PutBlob does not need it. // So, read everything from originalLayerReader, which will cause the rest to be diff --git a/image/docker_schema2.go b/image/docker_schema2.go index 351e73ea1d..d3c663febd 100644 --- a/image/docker_schema2.go +++ b/image/docker_schema2.go @@ -6,6 +6,7 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" + "fmt" "io/ioutil" "strings" @@ -207,12 +208,21 @@ func (m *manifestSchema2) convertToManifestOCI1(ctx context.Context) (types.Imag layers := make([]imgspecv1.Descriptor, len(m.m.LayersDescriptors)) for idx := range layers { layers[idx] = oci1DescriptorFromSchema2Descriptor(m.m.LayersDescriptors[idx]) - if m.m.LayersDescriptors[idx].MediaType == manifest.DockerV2Schema2ForeignLayerMediaType { + switch m.m.LayersDescriptors[idx].MediaType { + case manifest.DockerV2Schema2ForeignLayerMediaType: layers[idx].MediaType = imgspecv1.MediaTypeImageLayerNonDistributable - } else { - // we assume layers are gzip'ed because docker v2s2 only deals with - // gzip'ed layers. However, OCI has non-gzip'ed layers as well. + case manifest.DockerV2Schema2ForeignLayerMediaTypeGzip: + layers[idx].MediaType = imgspecv1.MediaTypeImageLayerNonDistributableGzip + case manifest.DockerV2Schema2ForeignLayerMediaTypeZstd: + layers[idx].MediaType = imgspecv1.MediaTypeImageLayerNonDistributableZstd + case manifest.DockerV2SchemaLayerMediaTypeUncompressed: + layers[idx].MediaType = imgspecv1.MediaTypeImageLayer + case manifest.DockerV2Schema2LayerMediaType: layers[idx].MediaType = imgspecv1.MediaTypeImageLayerGzip + case manifest.DockerV2Schema2LayerMediaTypeZstd: + layers[idx].MediaType = imgspecv1.MediaTypeImageLayerZstd + default: + return nil, fmt.Errorf("Unknown media type during manifest conversion: %q", m.m.LayersDescriptors[idx].MediaType) } } diff --git a/image/oci.go b/image/oci.go index cdff26e06a..e5cd468dd0 100644 --- a/image/oci.go +++ b/image/oci.go @@ -3,6 +3,7 @@ package image import ( "context" "encoding/json" + "fmt" "io/ioutil" "github.com/containers/image/docker/reference" @@ -187,7 +188,22 @@ func (m *manifestOCI1) convertToManifestSchema2() (types.Image, error) { layers := make([]manifest.Schema2Descriptor, len(m.m.Layers)) for idx := range layers { layers[idx] = schema2DescriptorFromOCI1Descriptor(m.m.Layers[idx]) - layers[idx].MediaType = manifest.DockerV2Schema2LayerMediaType + switch layers[idx].MediaType { + case imgspecv1.MediaTypeImageLayerNonDistributable: + layers[idx].MediaType = manifest.DockerV2Schema2ForeignLayerMediaType + case imgspecv1.MediaTypeImageLayerNonDistributableGzip: + layers[idx].MediaType = manifest.DockerV2Schema2ForeignLayerMediaTypeGzip + case imgspecv1.MediaTypeImageLayerNonDistributableZstd: + layers[idx].MediaType = manifest.DockerV2Schema2ForeignLayerMediaTypeZstd + case imgspecv1.MediaTypeImageLayer: + layers[idx].MediaType = manifest.DockerV2SchemaLayerMediaTypeUncompressed + case imgspecv1.MediaTypeImageLayerGzip: + layers[idx].MediaType = manifest.DockerV2Schema2LayerMediaType + case imgspecv1.MediaTypeImageLayerZstd: + layers[idx].MediaType = manifest.DockerV2Schema2LayerMediaTypeZstd + default: + return nil, fmt.Errorf("Unknown media type during manifest conversion: %q", layers[idx].MediaType) + } } // Rather than copying the ConfigBlob now, we just pass m.src to the diff --git a/image/sourced.go b/image/sourced.go index 01cc28bbd2..c8364a1454 100644 --- a/image/sourced.go +++ b/image/sourced.go @@ -5,6 +5,7 @@ package image import ( "context" + "github.com/containers/image/types" ) diff --git a/manifest/docker_schema2.go b/manifest/docker_schema2.go index 76a80e5a6f..4f8a63f988 100644 --- a/manifest/docker_schema2.go +++ b/manifest/docker_schema2.go @@ -2,12 +2,15 @@ package manifest import ( "encoding/json" + "fmt" "time" + "github.com/containers/image/pkg/compression" "github.com/containers/image/pkg/strslice" "github.com/containers/image/types" "github.com/opencontainers/go-digest" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // Schema2Descriptor is a “descriptor” in docker/distribution schema 2. @@ -207,7 +210,58 @@ func (m *Schema2) UpdateLayerInfos(layerInfos []types.BlobInfo) error { original := m.LayersDescriptors m.LayersDescriptors = make([]Schema2Descriptor, len(layerInfos)) for i, info := range layerInfos { - m.LayersDescriptors[i].MediaType = original[i].MediaType + // Set the correct media types based on the specified compression + // operation, the desired compression algorithm AND the original media + // type. + switch info.CompressionOperation { + case types.PreserveOriginal: + // Keep the original media type. + m.LayersDescriptors[i].MediaType = original[i].MediaType + + case types.Decompress: + // Decompress the original media type and check if it was + // non-distributable one or not. + switch original[i].MediaType { + case DockerV2Schema2ForeignLayerMediaTypeGzip, DockerV2Schema2ForeignLayerMediaTypeZstd: + m.LayersDescriptors[i].MediaType = DockerV2Schema2ForeignLayerMediaType + default: + m.LayersDescriptors[i].MediaType = DockerV2SchemaLayerMediaTypeUncompressed + } + + // TODO: should this only work on "known" media types? + // For background, please refer to: + // https://github.com/containers/image/pull/563#discussion_r316772562 + case types.Compress: + if info.CompressionAlgorithm == nil { + logrus.Debugf("Preparing updated manifest: blob %q was compressed but does not specify by which algorithm: falling back to use the original blob", info.Digest) + m.LayersDescriptors[i].MediaType = original[i].MediaType + break + } + // Compress the original media type and set the new one based on + // that type (distributable or not) and the specified compression + // algorithm. Throw an error if the algorithm is not supported. + switch info.CompressionAlgorithm.Name() { + case compression.Gzip.Name(): + switch original[i].MediaType { + case DockerV2Schema2ForeignLayerMediaType, DockerV2Schema2ForeignLayerMediaTypeZstd: + m.LayersDescriptors[i].MediaType = DockerV2Schema2ForeignLayerMediaTypeGzip + default: + m.LayersDescriptors[i].MediaType = DockerV2Schema2LayerMediaType + } + case compression.Zstd.Name(): + switch original[i].MediaType { + case DockerV2Schema2ForeignLayerMediaType, DockerV2Schema2ForeignLayerMediaTypeGzip: + m.LayersDescriptors[i].MediaType = DockerV2Schema2ForeignLayerMediaTypeZstd + default: + m.LayersDescriptors[i].MediaType = DockerV2Schema2LayerMediaTypeZstd + } + default: + return fmt.Errorf("Error preparing updated manifest: unknown compression algorithm %q fo layer %q", info.CompressionAlgorithm.Name(), info.Digest) + } + + default: + return fmt.Errorf("Error preparing updated manifest: unknown compression operation (%d) for layer %q", info.CompressionOperation, info.Digest) + } m.LayersDescriptors[i].Digest = info.Digest m.LayersDescriptors[i].Size = info.Size m.LayersDescriptors[i].URLs = info.URLs diff --git a/manifest/manifest.go b/manifest/manifest.go index ae1921b6cc..d2113455c0 100644 --- a/manifest/manifest.go +++ b/manifest/manifest.go @@ -12,7 +12,7 @@ import ( // FIXME: Should we just use docker/distribution and docker/docker implementations directly? -// FIXME(runcom, mitr): should we havea mediatype pkg?? +// FIXME(runcom, mitr): should we have a mediatype pkg?? const ( // DockerV2Schema1MediaType MIME type represents Docker manifest schema 1 DockerV2Schema1MediaType = "application/vnd.docker.distribution.manifest.v1+json" @@ -24,10 +24,18 @@ const ( DockerV2Schema2ConfigMediaType = "application/vnd.docker.container.image.v1+json" // DockerV2Schema2LayerMediaType is the MIME type used for schema 2 layers. DockerV2Schema2LayerMediaType = "application/vnd.docker.image.rootfs.diff.tar.gzip" + // DockerV2Schema2LayerMediaTypeZstd is the MIME type used for schema 2 layers compressed with zstd. + DockerV2Schema2LayerMediaTypeZstd = "application/vnd.docker.image.rootfs.diff.tar.zstd" + // DockerV2SchemaLayerMediaTypeUncompressed is the mediaType used for uncompressed layers. + DockerV2SchemaLayerMediaTypeUncompressed = "application/vnd.docker.image.rootfs.diff.tar" // DockerV2ListMediaType MIME type represents Docker manifest schema 2 list DockerV2ListMediaType = "application/vnd.docker.distribution.manifest.list.v2+json" // DockerV2Schema2ForeignLayerMediaType is the MIME type used for schema 2 foreign layers. - DockerV2Schema2ForeignLayerMediaType = "application/vnd.docker.image.rootfs.foreign.diff.tar.gzip" + DockerV2Schema2ForeignLayerMediaType = "application/vnd.docker.image.rootfs.foreign.diff.tar" + // DockerV2Schema2ForeignLayerMediaType is the MIME type used for gzippped schema 2 foreign layers. + DockerV2Schema2ForeignLayerMediaTypeGzip = "application/vnd.docker.image.rootfs.foreign.diff.tar.gzip" + // DockerV2Schema2ForeignLayerMediaType is the MIME type used for schema 2 foreign layers compressed with zstd. + DockerV2Schema2ForeignLayerMediaTypeZstd = "application/vnd.docker.image.rootfs.foreign.diff.tar.zstd" ) // DefaultRequestedManifestMIMETypes is a list of MIME types a types.ImageSource diff --git a/manifest/oci.go b/manifest/oci.go index dd65e0ba27..a3818c0efc 100644 --- a/manifest/oci.go +++ b/manifest/oci.go @@ -2,12 +2,15 @@ package manifest import ( "encoding/json" + "fmt" + "github.com/containers/image/pkg/compression" "github.com/containers/image/types" "github.com/opencontainers/go-digest" "github.com/opencontainers/image-spec/specs-go" imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // BlobInfoFromOCI1Descriptor returns a types.BlobInfo based on the input OCI1 descriptor. @@ -81,7 +84,62 @@ func (m *OCI1) UpdateLayerInfos(layerInfos []types.BlobInfo) error { original := m.Layers m.Layers = make([]imgspecv1.Descriptor, len(layerInfos)) for i, info := range layerInfos { - m.Layers[i].MediaType = original[i].MediaType + // Set the correct media types based on the specified compression + // operation, the desired compression algorithm AND the original media + // type. + switch info.CompressionOperation { + case types.PreserveOriginal: + // Keep the original media type. + m.Layers[i].MediaType = original[i].MediaType + + case types.Decompress: + // Decompress the original media type and check if it was + // non-distributable one or not. + switch original[i].MediaType { + case imgspecv1.MediaTypeImageLayerNonDistributableGzip, imgspecv1.MediaTypeImageLayerNonDistributableZstd: + m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayerNonDistributable + default: + m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayer + } + + // TODO: should this only work on "known" media types? + // For background, please refer to: + // https://github.com/containers/image/pull/563#discussion_r316772562 + case types.Compress: + if info.CompressionAlgorithm == nil { + logrus.Debugf("Preparing updated manifest: blob %q was compressed but does not specify by which algorithm: falling back to use the original blob", info.Digest) + m.Layers[i].MediaType = original[i].MediaType + break + } + // Compress the original media type and set the new one based on + // that type (distributable or not) and the specified compression + // algorithm. Throw an error if the algorithm is not supported. + switch info.CompressionAlgorithm.Name() { + case compression.Gzip.Name(): + switch original[i].MediaType { + case imgspecv1.MediaTypeImageLayerNonDistributable, imgspecv1.MediaTypeImageLayerNonDistributableZstd: + m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayerNonDistributableGzip + + default: + m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayerGzip + } + + case compression.Zstd.Name(): + switch original[i].MediaType { + case imgspecv1.MediaTypeImageLayerNonDistributable, imgspecv1.MediaTypeImageLayerNonDistributableGzip: + m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayerNonDistributableZstd + + default: + m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayerZstd + } + + default: + return fmt.Errorf("Error preparing updated manifest: unknown compression algorithm %q for layer %q", info.CompressionAlgorithm.Name(), info.Digest) + } + + default: + return fmt.Errorf("Error preparing updated manifest: unknown compression operation (%d) for layer %q", info.CompressionOperation, info.Digest) + } m.Layers[i].Digest = info.Digest m.Layers[i].Size = info.Size m.Layers[i].Annotations = info.Annotations diff --git a/pkg/compression/compression.go b/pkg/compression/compression.go index b42151cffc..267868c6ab 100644 --- a/pkg/compression/compression.go +++ b/pkg/compression/compression.go @@ -13,6 +13,46 @@ import ( "github.com/ulikunitz/xz" ) +// Algorithm is a compression algorithm that can be used for CompressStream. +type Algorithm struct { + name string + prefix []byte + decompressor DecompressorFunc + compressor compressorFunc +} + +var ( + // Gzip compression. + Gzip = Algorithm{"gzip", []byte{0x1F, 0x8B, 0x08}, GzipDecompressor, gzipCompressor} + // Bzip2 compression. + Bzip2 = Algorithm{"bzip2", []byte{0x42, 0x5A, 0x68}, Bzip2Decompressor, bzip2Compressor} + // Xz compression. + Xz = Algorithm{"Xz", []byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00}, XzDecompressor, xzCompressor} + // Zstd compression. + Zstd = Algorithm{"zstd", []byte{0x28, 0xb5, 0x2f, 0xfd}, ZstdDecompressor, zstdCompressor} + + compressionAlgorithms = map[string]Algorithm{ + Gzip.name: Gzip, + Bzip2.name: Bzip2, + Xz.name: Xz, + Zstd.name: Zstd, + } +) + +// Name returns the name for the compression algorithm. +func (c Algorithm) Name() string { + return c.name +} + +// AlgorithmByName returns the compressor by its name +func AlgorithmByName(name string) (Algorithm, error) { + algorithm, ok := compressionAlgorithms[name] + if ok { + return algorithm, nil + } + return Algorithm{}, fmt.Errorf("cannot find compressor for %q", name) +} + // DecompressorFunc returns the decompressed stream, given a compressed stream. // The caller must call Close() on the decompressed stream (even if the compressed input stream does not need closing!). type DecompressorFunc func(io.Reader) (io.ReadCloser, error) @@ -58,37 +98,6 @@ func xzCompressor(r io.Writer, level *int) (io.WriteCloser, error) { return xz.NewWriter(r) } -// Algorithm is a compression algorithm that can be used for CompressStream. -type Algorithm struct { - name string - prefix []byte - decompressor DecompressorFunc - compressor compressorFunc -} - -// Name returns the name for the compression algorithm. -func (c Algorithm) Name() string { - return c.name -} - -// compressionAlgos is an internal implementation detail of DetectCompression -var compressionAlgos = []Algorithm{ - {"gzip", []byte{0x1F, 0x8B, 0x08}, GzipDecompressor, gzipCompressor}, // gzip (RFC 1952) - {"bzip2", []byte{0x42, 0x5A, 0x68}, Bzip2Decompressor, bzip2Compressor}, // bzip2 (decompress.c:BZ2_decompress) - {"xz", []byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00}, XzDecompressor, xzCompressor}, // xz (/usr/share/doc/xz/xz-file-format.txt) - {"zstd", []byte{0x28, 0xb5, 0x2f, 0xfd}, ZstdDecompressor, zstdCompressor}, // zstd (http://www.zstd.net) -} - -// AlgorithmByName returns the compressor by its name -func AlgorithmByName(name string) (Algorithm, error) { - for _, c := range compressionAlgos { - if c.name == name { - return c, nil - } - } - return Algorithm{}, fmt.Errorf("cannot find compressor for %q", name) -} - // CompressStream returns the compressor by its name func CompressStream(dest io.Writer, algo Algorithm, level *int) (io.WriteCloser, error) { return algo.compressor(dest, level) @@ -108,7 +117,7 @@ func DetectCompressionFormat(input io.Reader) (Algorithm, DecompressorFunc, io.R var retAlgo Algorithm var decompressor DecompressorFunc - for _, algo := range compressionAlgos { + for _, algo := range compressionAlgorithms { if bytes.HasPrefix(buffer[:n], algo.prefix) { logrus.Debugf("Detected compression format %s", algo.name) retAlgo = algo diff --git a/storage/storage_image.go b/storage/storage_image.go index 946a85f7b1..7d8860a508 100644 --- a/storage/storage_image.go +++ b/storage/storage_image.go @@ -345,9 +345,9 @@ func (s *storageImageDestination) Close() error { } func (s *storageImageDestination) DesiredLayerCompression() types.LayerCompression { - // We ultimately have to decompress layers to populate trees on disk, - // so callers shouldn't bother compressing them before handing them to - // us, if they're not already compressed. + // We ultimately have to decompress layers to populate trees on disk + // and need to explicitly ask for it here, so that the layers' MIME + // types can be set accordingly. return types.PreserveOriginal } diff --git a/types/types.go b/types/types.go index b94af8dccb..852fe91b7d 100644 --- a/types/types.go +++ b/types/types.go @@ -8,7 +8,7 @@ import ( "github.com/containers/image/docker/reference" "github.com/containers/image/pkg/compression" "github.com/opencontainers/go-digest" - "github.com/opencontainers/image-spec/specs-go/v1" + v1 "github.com/opencontainers/image-spec/specs-go/v1" ) // ImageTransport is a top-level namespace for ways to to store/load an image. @@ -91,14 +91,29 @@ type ImageReference interface { DeleteImage(ctx context.Context, sys *SystemContext) error } +// LayerCompression indicates if layers must be compressed, decompressed or preserved +type LayerCompression int + +const ( + // PreserveOriginal indicates the layer must be preserved, ie + // no compression or decompression. + PreserveOriginal LayerCompression = iota + // Decompress indicates the layer must be decompressed + Decompress + // Compress indicates the layer must be compressed + Compress +) + // BlobInfo collects known information about a blob (layer/config). // In some situations, some fields may be unknown, in others they may be mandatory; documenting an “unknown” value here does not override that. type BlobInfo struct { - Digest digest.Digest // "" if unknown. - Size int64 // -1 if unknown - URLs []string - Annotations map[string]string - MediaType string + Digest digest.Digest // "" if unknown. + Size int64 // -1 if unknown + URLs []string + Annotations map[string]string + MediaType string + CompressionOperation LayerCompression + CompressionAlgorithm *compression.Algorithm } // BICTransportScope encapsulates transport-dependent representation of a “scope” where blobs are or are not present. @@ -212,19 +227,6 @@ type ImageSource interface { LayerInfosForCopy(ctx context.Context) ([]BlobInfo, error) } -// LayerCompression indicates if layers must be compressed, decompressed or preserved -type LayerCompression int - -const ( - // PreserveOriginal indicates the layer must be preserved, ie - // no compression or decompression. - PreserveOriginal LayerCompression = iota - // Decompress indicates the layer must be decompressed - Decompress - // Compress indicates the layer must be compressed - Compress -) - // ImageDestination is a service, possibly remote (= slow), to store components of a single image. // // There is a specific required order for some of the calls: diff --git a/vendor.conf b/vendor.conf index 7efc86e9e2..7bcd43bdde 100644 --- a/vendor.conf +++ b/vendor.conf @@ -16,7 +16,7 @@ github.com/imdario/mergo 50d4dbd4eb0e84778abe37cefef140271d96fade github.com/mistifyio/go-zfs c0224de804d438efd11ea6e52ada8014537d6062 github.com/mtrmac/gpgme b2432428689ca58c2b8e8dea9449d3295cf96fc9 github.com/opencontainers/go-digest c9281466c8b2f606084ac71339773efd177436e7 -github.com/opencontainers/image-spec v1.0.0 +github.com/opencontainers/image-spec 775207bd45b6cb8153ce218cc59351799217451f github.com/opencontainers/runc 6b1d0e76f239ffb435445e5ae316d2676c07c6e3 github.com/pborman/uuid 1b00554d822231195d1babd97ff4a781231955c9 github.com/pkg/errors 248dadf4e9068a0b3e79f02ed0a610d935de5302