Skip to content

Commit

Permalink
clean up layer merge, ish
Browse files Browse the repository at this point in the history
Signed-off-by: Hamza El-Saawy <[email protected]>
  • Loading branch information
helsaawy committed Oct 1, 2024
1 parent eabfe15 commit 93d7538
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 108 deletions.
259 changes: 152 additions & 107 deletions internal/tools/lcowrootfs/layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,39 @@ import (
"io"
"path"
"strings"

"github.com/sirupsen/logrus"
)

type layerMerge struct {
// Append a `/` to directory names to be consistent with what GNU and BSD tar does.
trailingSlash bool
TrailingSlash bool
// Set file and directory owner user and group ID to 0 (root) and remove user and group name.
overrideOwner bool
OverrideOwner bool
// Override the tar header format.
overrideTarFormat tar.Format
OverrideTarFormat tar.Format
// Replace `\` in path names with `/`.
// Intended for tar file created on Windows, where `\` is the filepath separator.
ConvertBackslash bool

tw *tar.Writer
fileMap map[string]bool
}

func newLayerMerge() *layerMerge {
func newLayerMerge(w io.Writer) *layerMerge {
return &layerMerge{
overrideTarFormat: tar.FormatPAX,
OverrideTarFormat: tar.FormatPAX,
tw: tar.NewWriter(w),
}
}

func (x *layerMerge) close() error {
tw := x.tw
x.tw = nil
clear(x.fileMap)
return tw.Close()
}

// merge combines multiple tar filesystems (image layers) together, and writes the result to w.
//
// Adapted from [containerregistry extract], with several key differences:
Expand All @@ -37,12 +53,9 @@ func newLayerMerge() *layerMerge {
//
// [containerregistry extract]: https://github.com/google/go-containerregistry/blob/a07d1cab8700a9875699d2e7052f47acec30399d/pkg/v1/mutate/mutate.go#L264
func (x *layerMerge) merge(w io.Writer, layers ...io.Reader) error {
const whiteoutPrefix = ".wh."

tarWriter := tar.NewWriter(w)
defer tarWriter.Close()

fileMap := map[string]bool{}
if x.tw == nil || x.fileMap == nil || len(x.fileMap) != 0 {
return fmt.Errorf("improperly created %T", x)
}

// todo: slices.reverse

Expand All @@ -51,41 +64,77 @@ func (x *layerMerge) merge(w io.Writer, layers ...io.Reader) error {
// files as we see .wh. layers and ignore those in previous layers.
for i := len(layers) - 1; i >= 0; i-- {
layer := layers[i]
layerReader, err := uncompress(layer)
if err := x.appendTo(layer); err != nil {
return err
}
}
return nil
}

// append the tar layer to w.
//
// based off of: https://github.com/google/go-containerregistry/blob/a07d1cab8700a9875699d2e7052f47acec30399d/pkg/v1/mutate/mutate.go#L264
func (x *layerMerge) appendTo(layer io.Reader) error {
const whiteoutPrefix = ".wh."

r, err := uncompress(layer)
if err != nil {
return fmt.Errorf("uncompressing layer contents: %w", err)
}
defer r.Close()

tr := tar.NewReader(r)
for {
header, err := tr.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("uncompressing layer contents: %w", err)
return fmt.Errorf("reading tar: %w", err)
}

tarReader := tar.NewReader(layerReader)
for {
header, err := tarReader.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("reading tar: %w", err)
}
header.Name = x.normalize(header.Name)

entry := logrus.WithFields(logrus.Fields{
"directory": header.FileInfo().IsDir(),
"name": header.Name,
})

// use clean to remove leading
header.Name = path.Clean(header.Name)
basename := path.Base(header.Name)
dirname := path.Dir(header.Name)
tombstone := strings.HasPrefix(basename, whiteoutPrefix)
if tombstone {
basename = basename[len(whiteoutPrefix):]
}

// check if we have seen value before
// if we're checking a directory, don't filepath.Join names
var name string
if header.Typeflag == tar.TypeDir {
name = header.Name
} else {
name = path.Join(dirname, basename)
}

if _, ok := x.fileMap[name]; ok {
continue
}

// check for a whited out parent directory
if x.inWhiteoutDir(name) {
continue
}

// header.Name = filepath.ToSlash(header.Name)
//
// update header (as needed)
//

if x.trailingSlash && header.Typeflag == tar.TypeDir && !strings.HasSuffix(header.Name, `/`) {
if x.TrailingSlash && header.Typeflag == tar.TypeDir && !strings.HasSuffix(header.Name, `/`) {
entry.Debug("append trailing slash to directory name")
header.Name += `/`
}

// if !strings.HasPrefix(header.Name, `./`) {
// header.Name = `./` + header.Name
// }

if overrideOwner && (header.Gid != 0 || header.Gname != "" || header.Uid != 0 || header.Uname != "") {
if x.OverrideOwner && (header.Gid != 0 || header.Gname != "" || header.Uid != 0 || header.Uname != "") {
entry.WithFields(logrus.Fields{
"group": header.Gid,
"groupname": header.Gname,
Expand All @@ -98,53 +147,36 @@ func (x *layerMerge) merge(w io.Writer, layers ...io.Reader) error {
header.Gname = ""
header.Uname = ""
}
if x.overrideTarFormat != tar.FormatUnknown {
header.Format = x.overrideTarFormat
}

basename := path.Base(header.Name)
dirname := path.Dir(header.Name)
tombstone := strings.HasPrefix(basename, whiteoutPrefix)
if tombstone {
basename = basename[len(whiteoutPrefix):]
}

// check if we have seen value before
// if we're checking a directory, don't filepath.Join names
var name string
if header.Typeflag == tar.TypeDir {
name = header.Name
} else {
name = path.Join(dirname, basename)
}
if x.OverrideTarFormat != tar.FormatUnknown && header.Format != x.OverrideTarFormat {
entry.WithFields(logrus.Fields{
"format": header.Format.String(),
"override-format": x.OverrideTarFormat.String(),
}).Debug("override tar format")

if _, ok := fileMap[name]; ok {
continue
}
header.Format = x.OverrideTarFormat
}

// check for a whited out parent directory
if inWhiteoutDir(fileMap, name) {
continue
// mark file as handled. non-directory implicitly tombstones
// any entries with a matching (or child) name
x.fileMap[name] = tombstone || !(header.Typeflag == tar.TypeDir)
if !tombstone {
if err := x.tw.WriteHeader(header); err != nil {
return err
}

// mark file as handled. non-directory implicitly tombstones
// any entries with a matching (or child) name
fileMap[name] = tombstone || !(header.Typeflag == tar.TypeDir)
if !tombstone {
if err := tarWriter.WriteHeader(header); err != nil {
if header.Size > 0 {
if _, err := io.CopyN(x.tw, tr, header.Size); err != nil {
return err
}
if header.Size > 0 {
if _, err := io.CopyN(tarWriter, tarReader, header.Size); err != nil {
return err
}
}
}
}
}

return nil
}
func inWhiteoutDir(fileMap map[string]bool, file string) bool {

// based off of: https://github.com/google/go-containerregistry/blob/a07d1cab8700a9875699d2e7052f47acec30399d/pkg/v1/mutate/mutate.go#L264
func (x *layerMerge) inWhiteoutDir(file string) bool {
for {
if file == "" {
break
Expand All @@ -153,35 +185,62 @@ func inWhiteoutDir(fileMap map[string]bool, file string) bool {
if file == dirname {
break
}
if val, ok := fileMap[dirname]; ok && val {
if val, ok := x.fileMap[dirname]; ok && val {
return true
}
file = dirname
}
return false
}

// todo: move this into ./internal/tar and use in cmd/tar2ext4 and cmd/wclayer
// normalize names to avoid duplicates by calling [path.Clean], then removing leading slashes
func (x *layerMerge) normalize(p string) string {
if x.ConvertBackslash {
p = strings.ReplaceAll(p, `\`, "/")
}
return strings.TrimLeft(path.Clean(p), "/")
}

// merge combines overlays layers/filesystems together.
var gzipHeader = []byte{0x1F, 0x8B, 8}
// todo: move this into ./internal/tar and use in cmd/tar2ext4 and cmd/wclayer

// uncompress detects if the reader is compressed, and, if so, returns an uncompressed reader.
//
// Adapted from [containerregistry PeekCompression], but with [io.ReaderAt] support and without zstd
// compression.
// Adapted from [containerregistry PeekCompression], but with [io.ReaderAt] support.
//
// [containerregistry extract]: https://github.com/google/go-containerregistry/blob/a07d1cab8700a9875699d2e7052f47acec30399d/internal/compression/compression.go#L52
func uncompress(r io.Reader) (io.Reader, error) {
// a bufio.Reader
type peeker interface { // TODO: name this something better...
// [containerregistry PeekCompression]: https://github.com/google/go-containerregistry/blob/a07d1cab8700a9875699d2e7052f47acec30399d/internal/compression/compression.go#L52
func uncompress(r io.Reader) (io.ReadCloser, error) {
var gzipHeader = []byte{0x1F, 0x8B, 8}

r, chkFn := getCheckHeaderFn(r)

// layers can be tar+gzip or tar+zstd
// TODO: add zstd support
if ok, err := chkFn(gzipHeader); err != nil {
return nil, fmt.Errorf("check for gzip header: %w", err)
} else if ok {
return gzip.NewReader(r)
}

return io.NopCloser(r), nil
}

// checkHeaderFn checks if the header was found in the underlying reader.
// it does not modify the reader's current state.
type checkHeaderFn func(header []byte) (bool, error)

// based off of: https://github.com/google/go-containerregistry/blob/a07d1cab8700a9875699d2e7052f47acec30399d/internal/compression/compression.go#L52
func getCheckHeaderFn(r io.Reader) (io.Reader, checkHeaderFn) {
type peekReader interface {
io.Reader
Peek(n int) ([]byte, error)
}

checkReadAt := func(r io.ReaderAt) func([]byte) (bool, error) {
return func(header []byte) (bool, error) {
var p peekReader
switch rr := r.(type) {
case io.ReaderAt:
fn := func(header []byte) (bool, error) {
b := make([]byte, len(header))
if n, err := r.ReadAt(b, 0); err == io.EOF {
if n, err := rr.ReadAt(b, 0); err == io.EOF {
return false, nil
} else if err != nil {
return false, err
Expand All @@ -190,35 +249,21 @@ func uncompress(r io.Reader) (io.Reader, error) {
}
return bytes.Equal(b, header), nil
}
}
checkPeek := func(r peeker) func([]byte) (bool, error) {
return func(header []byte) (bool, error) {
b, err := r.Peek(len(header))
if err == io.EOF {
return false, nil
} else if err != nil {
return false, err
}
return bytes.Equal(b, header), nil
}
}

var checkHeader func([]byte) (bool, error)
switch t := r.(type) {
case io.ReaderAt:
checkHeader = checkReadAt(t)
case peeker:
checkHeader = checkPeek(t)
return r, fn
case peekReader:
p = rr
default:
r = bufio.NewReader(r)
checkHeader = checkPeek(r.(*bufio.Reader))
p = bufio.NewReader(r)
}

if ok, err := checkHeader(gzipHeader); err != nil {
return nil, fmt.Errorf("check for gzip header: %w", err)
} else if ok {
return gzip.NewReader(r)
fn := func(header []byte) (bool, error) {
b, err := p.Peek(len(header))
if err == io.EOF {
return false, nil
} else if err != nil {
return false, err
}
return bytes.Equal(b, header), nil
}

return r, nil
return r, fn
}
7 changes: 6 additions & 1 deletion internal/tools/lcowrootfs/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ const (
mergeFlagOutput = "output"
mergeFlagNoTrailingSlash = "no-trailing-slash"
mergeFlagNoOverrideOwner = "no-override-owner"
mergeFlagConvertSlash = "conver-slash"
)

var merge = &cli.Command{
Expand Down Expand Up @@ -116,6 +117,10 @@ be changed by extraction`, "\n", " "),
Name: mergeFlagNoOverrideOwner,
Usage: "do not set file owner UID and GID to 0",
},
&cli.BoolFlag{
Name: mergeFlagConvertSlash,
Usage: "convert backslashes ('\\') in path names to slashes ('/')",
},
},
// basically crane (github.com/google/go-containerregistry/cmd/crane) append and export
Action: func(cCtx *cli.Context) error {
Expand Down Expand Up @@ -221,7 +226,7 @@ func writeImage(w io.WriteCloser, img v1.Image, trailingSlash, overrideOwner boo
"name": header.Name,
})

// header.Name = filepath.ToSlash(header.Name)
header.Name = filepath.ToSlash(header.Name)

if trailingSlash && header.Typeflag == tar.TypeDir && !strings.HasSuffix(header.Name, `/`) {
entry.Debug("append trailing slash to directory name")
Expand Down

0 comments on commit 93d7538

Please sign in to comment.