Skip to content

Commit

Permalink
slicer: Track and record paths in DB
Browse files Browse the repository at this point in the history
This commit completes the Chisel DB implementation by recording path
values in the DB.

Recording paths is trickier because

  1. we don't know which paths to record until all packages are sliced
     because of glob patterns,
  2. we need to record paths to slices relationship even for implicitly
     created parent directories, and
  3. we want to compute digests only once for source paths with multiple
     target paths.

This commit introduces the pathTracker interface and implementation that
tackles the problems above. Its methods are called in various places in
slicer and as callbacks in deb/extract to update the state of tracked
paths. At a high level, these are the steps taken to track paths:

  1. The callback interface in deb/extract is utilized to track created
     paths, their attributes, and their content digests.
  2. The same information is tracked for non-extracted content paths.
  3. Until-paths are untracked.
  4. Slices are assigned to both requested paths and their implicit
     parent directories.
  5. Digests of mutated files are updated.

After that, the tracked paths are recorded in the DB.

Subjective performance impact is minimal.

Note that copyright files are not owned by any slice. It is a bug that
will be fixed in future commits. That's why the nil Slices attribute was
kept in expected DB objects in test cases.
  • Loading branch information
woky committed Oct 9, 2023
1 parent 180aa16 commit 1fb555c
Show file tree
Hide file tree
Showing 5 changed files with 1,775 additions and 14 deletions.
3 changes: 3 additions & 0 deletions internal/slicer/export_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package slicer

type StringSet = stringSet
244 changes: 244 additions & 0 deletions internal/slicer/pathtrack.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
package slicer

import (
"crypto/sha256"
"fmt"
"io"
"io/fs"
"path/filepath"
"sort"

"github.com/canonical/chisel/internal/db"
"github.com/canonical/chisel/internal/deb"
"github.com/canonical/chisel/internal/fsutil"
"github.com/canonical/chisel/internal/strdist"
)

type pathTracker interface {
// addSlicePath records that the path belongs to the slice.
addSlicePath(slice, path string)
// addSliceGlob records that paths matched by the glob belong to the
// slice.
addSliceGlob(slice, glob string)
// onCreate is a callback passed to deb.ExtractOptions.onData. It
// records checksums of paths extracted from deb packages.
onData(source string, size int64) (deb.ConsumeData, error)
// onCreate is a callback passed to deb.ExtractOptions.OnData. It
// records metadata of paths extracted from deb packages.
onCreate(source, target, link string, mode fs.FileMode) error
// addTarget records the target path as being non-extracted content,
// i.e. not originating from a deb package.
addTarget(target, link string, mode fs.FileMode, data []byte)
// markMutated marks the target path as being changed by mutation
// scripts.
markMutated(target string)
// removeTarget removes the target path. It should be called for paths
// that have "until" attribute set.
removeTarget(target string)
// updateTargets reconciles changes made since the tracking started. It
// must be called before writing the database with updateDB.
updateTargets(root string) error
// upadteDB calls addToDB for each recorded entry.
updateDB(addToDB AddToDB) error
}

type contentInfo struct {
size int64
digest *[sha256.Size]byte
}

func computeDigest(data []byte) *[sha256.Size]byte {
digest := sha256.Sum256(data)
return &digest
}

type stringSet []string

func (s stringSet) AddOne(x string) (stringSet, bool) {
if s == nil {
return []string{x}, true
}
i := sort.SearchStrings(s, x)
if i == len(s) {
s = append(s, x)
} else if s[i] != x {
s = append(s[:i], append([]string{x}, s[i:]...)...)
} else {
return s, false
}
return s, true
}

func (s stringSet) AddMany(xs ...string) stringSet {
for _, x := range xs {
s, _ = s.AddOne(x)
}
return s
}

type pathTrackCtx struct {
pathSlices map[string]stringSet
globSlices map[string]stringSet
targetToSource map[string]string
sourceContent map[string]contentInfo
targets map[string]*db.Path
mutatedTargets map[string]bool
}

var _ pathTracker = (*pathTrackCtx)(nil)

func newPathTracker() pathTracker {
return &pathTrackCtx{
pathSlices: make(map[string]stringSet),
globSlices: make(map[string]stringSet),
targetToSource: make(map[string]string),
sourceContent: make(map[string]contentInfo),
targets: make(map[string]*db.Path),
mutatedTargets: make(map[string]bool),
}
}

func (ctx *pathTrackCtx) addSlicePath(slice, path string) {
ctx.pathSlices[path] = ctx.pathSlices[path].AddMany(slice)
}

func (ctx *pathTrackCtx) addSliceGlob(slice, glob string) {
ctx.globSlices[glob] = ctx.pathSlices[glob].AddMany(slice)
}

func (ctx *pathTrackCtx) onData(source string, size int64) (deb.ConsumeData, error) {
// XXX: We should return nil if the source matches one of the
// until-paths. But that would require some additional expensive
// tracking. Until-paths are now untracked by removeTarget() called
// during their removal from the output directory.
consume := func(reader io.Reader) error {
data, err := io.ReadAll(reader)
if err != nil {
return err
}
digest := computeDigest(data)
ctx.sourceContent[source] = contentInfo{size, digest}
return nil
}
return consume, nil
}

func (ctx *pathTrackCtx) onCreate(source, target, link string, mode fs.FileMode) error {
info := db.Path{
Path: target,
Mode: mode,
Link: link,
}
ctx.targets[target] = &info
ctx.targetToSource[target] = source
return nil
}

func (ctx *pathTrackCtx) addTarget(target, link string, mode fs.FileMode, data []byte) {
info := db.Path{
Path: target,
Mode: mode,
Link: link,
}
if data != nil {
info.Size = int64(len(data))
info.SHA256 = computeDigest(data)
}
ctx.targets[target] = &info
// add parents
for parent := fsutil.SlashedPathDir(target); parent != "/"; parent = fsutil.SlashedPathDir(parent) {
if _, ok := ctx.targets[parent]; ok {
break
}
ctx.targets[parent] = &db.Path{
Path: parent,
Mode: fs.ModeDir | 0755,
}
}
}

func (ctx *pathTrackCtx) markMutated(target string) {
ctx.mutatedTargets[target] = true
}

func (ctx *pathTrackCtx) removeTarget(target string) {
delete(ctx.targets, target)
}

func (ctx *pathTrackCtx) completeTarget(info *db.Path) {
// keep only permission bits
info.Mode = info.Mode & 07777

// copy content info from OnData callbacks
source := ctx.targetToSource[info.Path]
if content, ok := ctx.sourceContent[source]; ok {
info.Size = content.size
info.SHA256 = content.digest
}

// assign slices
slices := ctx.pathSlices[info.Path]
for glob, globSlices := range ctx.globSlices {
if strdist.GlobPath(glob, info.Path) {
slices = slices.AddMany(globSlices...)
}
}

// assign slices to parents
path := info.Path
for len(slices) > 0 && path != "/" {
newSlices := []string{}
for _, sl := range slices {
if tmp, ok := stringSet(info.Slices).AddOne(sl); ok {
info.Slices = tmp
newSlices = append(newSlices, sl)
}
}
slices = newSlices
path = fsutil.SlashedPathDir(path)
info = ctx.targets[path]
}
}

// set final digest on mutated files
func (ctx *pathTrackCtx) refreshTarget(info *db.Path, root string) error {
if !ctx.mutatedTargets[info.Path] || info.SHA256 != nil {
// not mutated or not a regular file
return nil
}
local := filepath.Join(root, info.Path)
data, err := io.ReadFile(local)

Check failure on line 210 in internal/slicer/pathtrack.go

View workflow job for this annotation

GitHub Actions / Unit Tests

undefined: io.ReadFile

Check failure on line 210 in internal/slicer/pathtrack.go

View workflow job for this annotation

GitHub Actions / Build Chisel (amd64, X86-64)

undefined: io.ReadFile

Check failure on line 210 in internal/slicer/pathtrack.go

View workflow job for this annotation

GitHub Actions / Spread tests

undefined: io.ReadFile

Check failure on line 210 in internal/slicer/pathtrack.go

View workflow job for this annotation

GitHub Actions / Spread tests

undefined: io.ReadFile

Check failure on line 210 in internal/slicer/pathtrack.go

View workflow job for this annotation

GitHub Actions / Build Chisel (arm, ARM)

undefined: io.ReadFile

Check failure on line 210 in internal/slicer/pathtrack.go

View workflow job for this annotation

GitHub Actions / Build Chisel (arm64, AArch64)

undefined: io.ReadFile

Check failure on line 210 in internal/slicer/pathtrack.go

View workflow job for this annotation

GitHub Actions / Build Chisel (ppc64le, PowerPC64)

undefined: io.ReadFile

Check failure on line 210 in internal/slicer/pathtrack.go

View workflow job for this annotation

GitHub Actions / Build Chisel (riscv64, RISC-V)

undefined: io.ReadFile

Check failure on line 210 in internal/slicer/pathtrack.go

View workflow job for this annotation

GitHub Actions / Build Chisel (s390x, S/390)

undefined: io.ReadFile
if err != nil {
return err
}
finalDigest := computeDigest(data)
if *finalDigest != *info.SHA256 {
info.FinalSHA256 = finalDigest
}
return nil
}

func (ctx *pathTrackCtx) updateTargets(root string) (err error) {
for _, info := range ctx.targets {
ctx.completeTarget(info)
if err = ctx.refreshTarget(info, root); err != nil {
break
}
}
return
}

func (ctx *pathTrackCtx) updateDB(addToDB AddToDB) error {
for _, info := range ctx.targets {
if err := addToDB(*info); err != nil {
return fmt.Errorf("cannot write path to db: %w", err)
}
for _, sl := range info.Slices {
content := db.Content{sl, info.Path}
if err := addToDB(content); err != nil {
return fmt.Errorf("cannot write content to db: %w", err)
}
}
}
return nil
}
58 changes: 44 additions & 14 deletions internal/slicer/slicer.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func Run(options *RunOptions) error {
if addToDB == nil {
addToDB = func(value any) error { return nil }
}
pathTrack := newPathTracker()

knownPaths["/"] = true

Expand Down Expand Up @@ -142,6 +143,11 @@ func Run(options *RunOptions) error {
})
}
}
if pathInfo.Kind == setup.GlobPath {
pathTrack.addSliceGlob(pkgSlice, targetPath)
} else {
pathTrack.addSlicePath(pkgSlice, targetPath)
}
}
if !hasCopyright {
extractPackage[copyrightPath] = append(extractPackage[copyrightPath], deb.ExtractInfo{
Expand Down Expand Up @@ -178,6 +184,8 @@ func Run(options *RunOptions) error {
Extract: extract[slice.Package],
TargetDir: targetDir,
Globbed: globbedPaths,
OnData: pathTrack.onData,
OnCreate: pathTrack.onCreate,
})
reader.Close()
packages[slice.Package] = nil
Expand All @@ -204,7 +212,7 @@ func Run(options *RunOptions) error {
continue
}
done[targetPath] = true
targetPath = filepath.Join(targetDir, targetPath)
localPath := filepath.Join(targetDir, targetPath)
targetMode := pathInfo.Mode
if targetMode == 0 {
if pathInfo.Kind == setup.DirPath {
Expand All @@ -216,7 +224,7 @@ func Run(options *RunOptions) error {

// Leverage tar handling of mode bits.
tarHeader := tar.Header{Mode: int64(targetMode)}
var fileContent io.Reader
var fileContent *bytes.Buffer
var linkTarget string
switch pathInfo.Kind {
case setup.TextPath:
Expand All @@ -230,10 +238,17 @@ func Run(options *RunOptions) error {
default:
return fmt.Errorf("internal error: cannot extract path of kind %q", pathInfo.Kind)
}
fsMode := tarHeader.FileInfo().Mode()

var data []byte
if fileContent != nil {
data = fileContent.Bytes()
}
pathTrack.addTarget(targetPath, linkTarget, fsMode, data)

err := fsutil.Create(&fsutil.CreateOptions{
Path: targetPath,
Mode: tarHeader.FileInfo().Mode(),
Path: localPath,
Mode: fsMode,
Data: fileContent,
Link: linkTarget,
MakeParents: true,
Expand All @@ -250,6 +265,7 @@ func Run(options *RunOptions) error {
if !pathInfos[path].Mutable {
return fmt.Errorf("cannot write file which is not mutable: %s", path)
}
pathTrack.markMutated(path)
return nil
}
checkRead := func(path string) error {
Expand Down Expand Up @@ -294,6 +310,10 @@ func Run(options *RunOptions) error {
}
}

if err := pathTrack.updateTargets(targetDir); err != nil {
return err
}

var untilDirs []string
for targetPath, pathInfo := range pathInfos {
if pathInfo.Until == setup.UntilMutate {
Expand All @@ -304,28 +324,38 @@ func Run(options *RunOptions) error {
targetPaths = []string{targetPath}
}
for _, targetPath := range targetPaths {
if strings.HasSuffix(targetPath, "/") {
untilDirs = append(untilDirs, targetPath)
continue
}
realPath, err := content.RealPath(targetPath, scripts.CheckRead)
if err == nil {
if strings.HasSuffix(targetPath, "/") {
untilDirs = append(untilDirs, realPath)
} else {
err = os.Remove(realPath)
}
err = os.Remove(realPath)
}
if err != nil {
return fmt.Errorf("cannot perform 'until' removal: %w", err)
}
pathTrack.removeTarget(targetPath)
}
}
}
for _, realPath := range untilDirs {
err := os.Remove(realPath)
// The non-empty directory error is caught by IsExist as well.
if err != nil && !os.IsExist(err) {
return fmt.Errorf("cannot perform 'until' removal: %#v", err)
for _, targetPath := range untilDirs {
realPath, err := content.RealPath(targetPath, scripts.CheckRead)
if err == nil {
err = os.Remove(realPath)
}
if err == nil {
pathTrack.removeTarget(targetPath)
} else if !os.IsExist(err) {
// The non-empty directory error is caught by IsExist as well.
return fmt.Errorf("cannot perform 'until' removal: %w", err)
}
}

if err := pathTrack.updateDB(addToDB); err != nil {
return err
}

return nil
}

Expand Down
Loading

0 comments on commit 1fb555c

Please sign in to comment.