Skip to content

Commit

Permalink
copy: set media types
Browse files Browse the repository at this point in the history
When copying an image, record the compression in the BlobInfo and use
the information when updating the manifest's layer infos to set the
layers' media types correctly.

Note that consumers of the containers/image library need to update
opencontainers/image-spec to commit 775207bd45b6cb8153ce218cc59351799217451f.

Fixes: github.com/containers/podman/issues/2013
Fixes: github.com/containers/buildah/issues/1589

Signed-off-by: Valentin Rothberg <[email protected]>
  • Loading branch information
vrothberg committed Aug 26, 2019
1 parent 713c8d4 commit 528944a
Show file tree
Hide file tree
Showing 11 changed files with 228 additions and 64 deletions.
6 changes: 6 additions & 0 deletions copy/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,12 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
return types.BlobInfo{}, errors.Wrap(err, "Error writing blob")
}

uploadedInfo.CompressionOperation = compressionOperation
// If we can modify the layer's blob, set the desired algorithm for it to be set in the manifest.
if canModifyBlob && !isConfig {
uploadedInfo.CompressionAlgorithm = &desiredCompressionFormat
}

// This is fairly horrible: the writer from getOriginalLayerCopyWriter wants to consumer
// all of the input (to compute DiffIDs), even if dest.PutBlob does not need it.
// So, read everything from originalLayerReader, which will cause the rest to be
Expand Down
18 changes: 14 additions & 4 deletions image/docker_schema2.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io/ioutil"
"strings"

Expand Down Expand Up @@ -207,12 +208,21 @@ func (m *manifestSchema2) convertToManifestOCI1(ctx context.Context) (types.Imag
layers := make([]imgspecv1.Descriptor, len(m.m.LayersDescriptors))
for idx := range layers {
layers[idx] = oci1DescriptorFromSchema2Descriptor(m.m.LayersDescriptors[idx])
if m.m.LayersDescriptors[idx].MediaType == manifest.DockerV2Schema2ForeignLayerMediaType {
switch m.m.LayersDescriptors[idx].MediaType {
case manifest.DockerV2Schema2ForeignLayerMediaType:
layers[idx].MediaType = imgspecv1.MediaTypeImageLayerNonDistributable
} else {
// we assume layers are gzip'ed because docker v2s2 only deals with
// gzip'ed layers. However, OCI has non-gzip'ed layers as well.
case manifest.DockerV2Schema2ForeignLayerMediaTypeGzip:
layers[idx].MediaType = imgspecv1.MediaTypeImageLayerNonDistributableGzip
case manifest.DockerV2Schema2ForeignLayerMediaTypeZstd:
layers[idx].MediaType = imgspecv1.MediaTypeImageLayerNonDistributableZstd
case manifest.DockerV2SchemaLayerMediaTypeUncompressed:
layers[idx].MediaType = imgspecv1.MediaTypeImageLayer
case manifest.DockerV2Schema2LayerMediaType:
layers[idx].MediaType = imgspecv1.MediaTypeImageLayerGzip
case manifest.DockerV2Schema2LayerMediaTypeZstd:
layers[idx].MediaType = imgspecv1.MediaTypeImageLayerZstd
default:
return nil, fmt.Errorf("Unknown media type during manifest conversion: %q", m.m.LayersDescriptors[idx].MediaType)
}
}

Expand Down
18 changes: 17 additions & 1 deletion image/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package image
import (
"context"
"encoding/json"
"fmt"
"io/ioutil"

"github.com/containers/image/docker/reference"
Expand Down Expand Up @@ -187,7 +188,22 @@ func (m *manifestOCI1) convertToManifestSchema2() (types.Image, error) {
layers := make([]manifest.Schema2Descriptor, len(m.m.Layers))
for idx := range layers {
layers[idx] = schema2DescriptorFromOCI1Descriptor(m.m.Layers[idx])
layers[idx].MediaType = manifest.DockerV2Schema2LayerMediaType
switch layers[idx].MediaType {
case imgspecv1.MediaTypeImageLayerNonDistributable:
layers[idx].MediaType = manifest.DockerV2Schema2ForeignLayerMediaType
case imgspecv1.MediaTypeImageLayerNonDistributableGzip:
layers[idx].MediaType = manifest.DockerV2Schema2ForeignLayerMediaTypeGzip
case imgspecv1.MediaTypeImageLayerNonDistributableZstd:
layers[idx].MediaType = manifest.DockerV2Schema2ForeignLayerMediaTypeZstd
case imgspecv1.MediaTypeImageLayer:
layers[idx].MediaType = manifest.DockerV2SchemaLayerMediaTypeUncompressed
case imgspecv1.MediaTypeImageLayerGzip:
layers[idx].MediaType = manifest.DockerV2Schema2LayerMediaType
case imgspecv1.MediaTypeImageLayerZstd:
layers[idx].MediaType = manifest.DockerV2Schema2LayerMediaTypeZstd
default:
return nil, fmt.Errorf("Unknown media type during manifest conversion: %q", layers[idx].MediaType)
}
}

// Rather than copying the ConfigBlob now, we just pass m.src to the
Expand Down
1 change: 1 addition & 0 deletions image/sourced.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package image

import (
"context"

"github.com/containers/image/types"
)

Expand Down
56 changes: 55 additions & 1 deletion manifest/docker_schema2.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@ package manifest

import (
"encoding/json"
"fmt"
"time"

"github.com/containers/image/pkg/compression"
"github.com/containers/image/pkg/strslice"
"github.com/containers/image/types"
"github.com/opencontainers/go-digest"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)

// Schema2Descriptor is a “descriptor” in docker/distribution schema 2.
Expand Down Expand Up @@ -207,7 +210,58 @@ func (m *Schema2) UpdateLayerInfos(layerInfos []types.BlobInfo) error {
original := m.LayersDescriptors
m.LayersDescriptors = make([]Schema2Descriptor, len(layerInfos))
for i, info := range layerInfos {
m.LayersDescriptors[i].MediaType = original[i].MediaType
// Set the correct media types based on the specified compression
// operation, the desired compression algorithm AND the original media
// type.
switch info.CompressionOperation {
case types.PreserveOriginal:
// Keep the original media type.
m.LayersDescriptors[i].MediaType = original[i].MediaType

case types.Decompress:
// Decompress the original media type and check if it was
// non-distributable one or not.
switch original[i].MediaType {
case DockerV2Schema2ForeignLayerMediaTypeGzip, DockerV2Schema2ForeignLayerMediaTypeZstd:
m.LayersDescriptors[i].MediaType = DockerV2Schema2ForeignLayerMediaType
default:
m.LayersDescriptors[i].MediaType = DockerV2SchemaLayerMediaTypeUncompressed
}

// TODO: should this only work on "known" media types?
// For background, please refer to:
// https://github.com/containers/image/pull/563#discussion_r316772562
case types.Compress:
if info.CompressionAlgorithm == nil {
logrus.Debugf("Preparing updated manifest: blob %q was compressed but does not specify by which algorithm: falling back to use the original blob", info.Digest)
m.LayersDescriptors[i].MediaType = original[i].MediaType
break
}
// Compress the original media type and set the new one based on
// that type (distributable or not) and the specified compression
// algorithm. Throw an error if the algorithm is not supported.
switch info.CompressionAlgorithm.Name() {
case compression.Gzip.Name():
switch original[i].MediaType {
case DockerV2Schema2ForeignLayerMediaType, DockerV2Schema2ForeignLayerMediaTypeZstd:
m.LayersDescriptors[i].MediaType = DockerV2Schema2ForeignLayerMediaTypeGzip
default:
m.LayersDescriptors[i].MediaType = DockerV2Schema2LayerMediaType
}
case compression.Zstd.Name():
switch original[i].MediaType {
case DockerV2Schema2ForeignLayerMediaType, DockerV2Schema2ForeignLayerMediaTypeGzip:
m.LayersDescriptors[i].MediaType = DockerV2Schema2ForeignLayerMediaTypeZstd
default:
m.LayersDescriptors[i].MediaType = DockerV2Schema2LayerMediaTypeZstd
}
default:
return fmt.Errorf("Error preparing updated manifest: unknown compression algorithm %q fo layer %q", info.CompressionAlgorithm.Name(), info.Digest)
}

default:
return fmt.Errorf("Error preparing updated manifest: unknown compression operation (%d) for layer %q", info.CompressionOperation, info.Digest)
}
m.LayersDescriptors[i].Digest = info.Digest
m.LayersDescriptors[i].Size = info.Size
m.LayersDescriptors[i].URLs = info.URLs
Expand Down
12 changes: 10 additions & 2 deletions manifest/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (

// FIXME: Should we just use docker/distribution and docker/docker implementations directly?

// FIXME(runcom, mitr): should we havea mediatype pkg??
// FIXME(runcom, mitr): should we have a mediatype pkg??
const (
// DockerV2Schema1MediaType MIME type represents Docker manifest schema 1
DockerV2Schema1MediaType = "application/vnd.docker.distribution.manifest.v1+json"
Expand All @@ -24,10 +24,18 @@ const (
DockerV2Schema2ConfigMediaType = "application/vnd.docker.container.image.v1+json"
// DockerV2Schema2LayerMediaType is the MIME type used for schema 2 layers.
DockerV2Schema2LayerMediaType = "application/vnd.docker.image.rootfs.diff.tar.gzip"
// DockerV2Schema2LayerMediaTypeZstd is the MIME type used for schema 2 layers compressed with zstd.
DockerV2Schema2LayerMediaTypeZstd = "application/vnd.docker.image.rootfs.diff.tar.zstd"
// DockerV2SchemaLayerMediaTypeUncompressed is the mediaType used for uncompressed layers.
DockerV2SchemaLayerMediaTypeUncompressed = "application/vnd.docker.image.rootfs.diff.tar"
// DockerV2ListMediaType MIME type represents Docker manifest schema 2 list
DockerV2ListMediaType = "application/vnd.docker.distribution.manifest.list.v2+json"
// DockerV2Schema2ForeignLayerMediaType is the MIME type used for schema 2 foreign layers.
DockerV2Schema2ForeignLayerMediaType = "application/vnd.docker.image.rootfs.foreign.diff.tar.gzip"
DockerV2Schema2ForeignLayerMediaType = "application/vnd.docker.image.rootfs.foreign.diff.tar"
// DockerV2Schema2ForeignLayerMediaType is the MIME type used for gzippped schema 2 foreign layers.
DockerV2Schema2ForeignLayerMediaTypeGzip = "application/vnd.docker.image.rootfs.foreign.diff.tar.gzip"
// DockerV2Schema2ForeignLayerMediaType is the MIME type used for schema 2 foreign layers compressed with zstd.
DockerV2Schema2ForeignLayerMediaTypeZstd = "application/vnd.docker.image.rootfs.foreign.diff.tar.zstd"
)

// DefaultRequestedManifestMIMETypes is a list of MIME types a types.ImageSource
Expand Down
60 changes: 59 additions & 1 deletion manifest/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@ package manifest

import (
"encoding/json"
"fmt"

"github.com/containers/image/pkg/compression"
"github.com/containers/image/types"
"github.com/opencontainers/go-digest"
"github.com/opencontainers/image-spec/specs-go"
imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)

// BlobInfoFromOCI1Descriptor returns a types.BlobInfo based on the input OCI1 descriptor.
Expand Down Expand Up @@ -81,7 +84,62 @@ func (m *OCI1) UpdateLayerInfos(layerInfos []types.BlobInfo) error {
original := m.Layers
m.Layers = make([]imgspecv1.Descriptor, len(layerInfos))
for i, info := range layerInfos {
m.Layers[i].MediaType = original[i].MediaType
// Set the correct media types based on the specified compression
// operation, the desired compression algorithm AND the original media
// type.
switch info.CompressionOperation {
case types.PreserveOriginal:
// Keep the original media type.
m.Layers[i].MediaType = original[i].MediaType

case types.Decompress:
// Decompress the original media type and check if it was
// non-distributable one or not.
switch original[i].MediaType {
case imgspecv1.MediaTypeImageLayerNonDistributableGzip, imgspecv1.MediaTypeImageLayerNonDistributableZstd:
m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayerNonDistributable
default:
m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayer
}

// TODO: should this only work on "known" media types?
// For background, please refer to:
// https://github.com/containers/image/pull/563#discussion_r316772562
case types.Compress:
if info.CompressionAlgorithm == nil {
logrus.Debugf("Preparing updated manifest: blob %q was compressed but does not specify by which algorithm: falling back to use the original blob", info.Digest)
m.Layers[i].MediaType = original[i].MediaType
break
}
// Compress the original media type and set the new one based on
// that type (distributable or not) and the specified compression
// algorithm. Throw an error if the algorithm is not supported.
switch info.CompressionAlgorithm.Name() {
case compression.Gzip.Name():
switch original[i].MediaType {
case imgspecv1.MediaTypeImageLayerNonDistributable, imgspecv1.MediaTypeImageLayerNonDistributableZstd:
m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayerNonDistributableGzip

default:
m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayerGzip
}

case compression.Zstd.Name():
switch original[i].MediaType {
case imgspecv1.MediaTypeImageLayerNonDistributable, imgspecv1.MediaTypeImageLayerNonDistributableGzip:
m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayerNonDistributableZstd

default:
m.Layers[i].MediaType = imgspecv1.MediaTypeImageLayerZstd
}

default:
return fmt.Errorf("Error preparing updated manifest: unknown compression algorithm %q for layer %q", info.CompressionAlgorithm.Name(), info.Digest)
}

default:
return fmt.Errorf("Error preparing updated manifest: unknown compression operation (%d) for layer %q", info.CompressionOperation, info.Digest)
}
m.Layers[i].Digest = info.Digest
m.Layers[i].Size = info.Size
m.Layers[i].Annotations = info.Annotations
Expand Down
73 changes: 41 additions & 32 deletions pkg/compression/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,46 @@ import (
"github.com/ulikunitz/xz"
)

// Algorithm is a compression algorithm that can be used for CompressStream.
type Algorithm struct {
name string
prefix []byte
decompressor DecompressorFunc
compressor compressorFunc
}

var (
// Gzip compression.
Gzip = Algorithm{"gzip", []byte{0x1F, 0x8B, 0x08}, GzipDecompressor, gzipCompressor}
// Bzip2 compression.
Bzip2 = Algorithm{"bzip2", []byte{0x42, 0x5A, 0x68}, Bzip2Decompressor, bzip2Compressor}
// Xz compression.
Xz = Algorithm{"Xz", []byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00}, XzDecompressor, xzCompressor}
// Zstd compression.
Zstd = Algorithm{"zstd", []byte{0x28, 0xb5, 0x2f, 0xfd}, ZstdDecompressor, zstdCompressor}

compressionAlgorithms = map[string]Algorithm{
Gzip.name: Gzip,
Bzip2.name: Bzip2,
Xz.name: Xz,
Zstd.name: Zstd,
}
)

// Name returns the name for the compression algorithm.
func (c Algorithm) Name() string {
return c.name
}

// AlgorithmByName returns the compressor by its name
func AlgorithmByName(name string) (Algorithm, error) {
algorithm, ok := compressionAlgorithms[name]
if ok {
return algorithm, nil
}
return Algorithm{}, fmt.Errorf("cannot find compressor for %q", name)
}

// DecompressorFunc returns the decompressed stream, given a compressed stream.
// The caller must call Close() on the decompressed stream (even if the compressed input stream does not need closing!).
type DecompressorFunc func(io.Reader) (io.ReadCloser, error)
Expand Down Expand Up @@ -58,37 +98,6 @@ func xzCompressor(r io.Writer, level *int) (io.WriteCloser, error) {
return xz.NewWriter(r)
}

// Algorithm is a compression algorithm that can be used for CompressStream.
type Algorithm struct {
name string
prefix []byte
decompressor DecompressorFunc
compressor compressorFunc
}

// Name returns the name for the compression algorithm.
func (c Algorithm) Name() string {
return c.name
}

// compressionAlgos is an internal implementation detail of DetectCompression
var compressionAlgos = []Algorithm{
{"gzip", []byte{0x1F, 0x8B, 0x08}, GzipDecompressor, gzipCompressor}, // gzip (RFC 1952)
{"bzip2", []byte{0x42, 0x5A, 0x68}, Bzip2Decompressor, bzip2Compressor}, // bzip2 (decompress.c:BZ2_decompress)
{"xz", []byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00}, XzDecompressor, xzCompressor}, // xz (/usr/share/doc/xz/xz-file-format.txt)
{"zstd", []byte{0x28, 0xb5, 0x2f, 0xfd}, ZstdDecompressor, zstdCompressor}, // zstd (http://www.zstd.net)
}

// AlgorithmByName returns the compressor by its name
func AlgorithmByName(name string) (Algorithm, error) {
for _, c := range compressionAlgos {
if c.name == name {
return c, nil
}
}
return Algorithm{}, fmt.Errorf("cannot find compressor for %q", name)
}

// CompressStream returns the compressor by its name
func CompressStream(dest io.Writer, algo Algorithm, level *int) (io.WriteCloser, error) {
return algo.compressor(dest, level)
Expand All @@ -108,7 +117,7 @@ func DetectCompressionFormat(input io.Reader) (Algorithm, DecompressorFunc, io.R

var retAlgo Algorithm
var decompressor DecompressorFunc
for _, algo := range compressionAlgos {
for _, algo := range compressionAlgorithms {
if bytes.HasPrefix(buffer[:n], algo.prefix) {
logrus.Debugf("Detected compression format %s", algo.name)
retAlgo = algo
Expand Down
6 changes: 3 additions & 3 deletions storage/storage_image.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,9 @@ func (s *storageImageDestination) Close() error {
}

func (s *storageImageDestination) DesiredLayerCompression() types.LayerCompression {
// We ultimately have to decompress layers to populate trees on disk,
// so callers shouldn't bother compressing them before handing them to
// us, if they're not already compressed.
// We ultimately have to decompress layers to populate trees on disk
// and need to explicitly ask for it here, so that the layers' MIME
// types can be set accordingly.
return types.PreserveOriginal
}

Expand Down
Loading

0 comments on commit 528944a

Please sign in to comment.