From 488b6adf6d17d8dcf4d1bcb6b83d02bebfddcdd2 Mon Sep 17 00:00:00 2001 From: Zhijie Yang Date: Thu, 5 Sep 2024 12:51:59 +0200 Subject: [PATCH 1/4] feat: handle hardlinks symmetrically --- internal/archive/archive.go | 6 +- internal/archive/archive_test.go | 5 +- internal/cache/cache.go | 2 +- internal/deb/extract.go | 174 ++++++++++++++++++++++++++----- internal/deb/extract_test.go | 4 +- internal/fsutil/create.go | 23 ++-- internal/slicer/report.go | 28 ++--- internal/slicer/slicer.go | 11 +- internal/slicer/slicer_test.go | 73 ++++++++++++- internal/testutil/nop_closer.go | 19 ++++ 10 files changed, 278 insertions(+), 67 deletions(-) create mode 100644 internal/testutil/nop_closer.go diff --git a/internal/archive/archive.go b/internal/archive/archive.go index 2a552084..06414a70 100644 --- a/internal/archive/archive.go +++ b/internal/archive/archive.go @@ -18,7 +18,7 @@ import ( type Archive interface { Options() *Options - Fetch(pkg string) (io.ReadCloser, error) + Fetch(pkg string) (io.ReadSeekCloser, error) Exists(pkg string) bool } @@ -112,7 +112,7 @@ func (a *ubuntuArchive) selectPackage(pkg string) (control.Section, *ubuntuIndex return selectedSection, selectedIndex, nil } -func (a *ubuntuArchive) Fetch(pkg string) (io.ReadCloser, error) { +func (a *ubuntuArchive) Fetch(pkg string) (io.ReadSeekCloser, error) { section, index, err := a.selectPackage(pkg) if err != nil { return nil, err @@ -269,7 +269,7 @@ func (index *ubuntuIndex) checkComponents(components []string) error { return nil } -func (index *ubuntuIndex) fetch(suffix, digest string, flags fetchFlags) (io.ReadCloser, error) { +func (index *ubuntuIndex) fetch(suffix, digest string, flags fetchFlags) (io.ReadSeekCloser, error) { reader, err := index.archive.cache.Open(digest) if err == nil { return reader, nil diff --git a/internal/archive/archive_test.go b/internal/archive/archive_test.go index af741dbe..fd1d5ff7 100644 --- a/internal/archive/archive_test.go +++ b/internal/archive/archive_test.go @@ -490,8 +490,9 @@ func (s *S) testOpenArchiveArch(c *C, release ubuntuRelease, arch string) { c.Assert(err, IsNil) err = deb.Extract(pkg, &deb.ExtractOptions{ - Package: "hostname", - TargetDir: extractDir, + Package: "hostname", + TargetDir: extractDir, + StagingDir: c.MkDir(), Extract: map[string][]deb.ExtractInfo{ "/usr/share/doc/hostname/copyright": { {Path: "/copyright"}, diff --git a/internal/cache/cache.go b/internal/cache/cache.go index 09d6df03..1dabc0d1 100644 --- a/internal/cache/cache.go +++ b/internal/cache/cache.go @@ -132,7 +132,7 @@ func (c *Cache) Write(digest string, data []byte) error { return err2 } -func (c *Cache) Open(digest string) (io.ReadCloser, error) { +func (c *Cache) Open(digest string) (io.ReadSeekCloser, error) { if c.Dir == "" || digest == "" { return nil, MissErr } diff --git a/internal/deb/extract.go b/internal/deb/extract.go index 406c1991..7fd69ec2 100644 --- a/internal/deb/extract.go +++ b/internal/deb/extract.go @@ -14,17 +14,19 @@ import ( "syscall" "github.com/blakesmith/ar" - "github.com/klauspost/compress/zstd" - "github.com/ulikunitz/xz" - "github.com/canonical/chisel/internal/fsutil" "github.com/canonical/chisel/internal/strdist" + "github.com/klauspost/compress/zstd" + "github.com/ulikunitz/xz" ) type ExtractOptions struct { Package string TargetDir string - Extract map[string][]ExtractInfo + // StageDir is the directory where the hard link base file is located, + // if this file is not within the target paths. + StagingDir string + Extract map[string][]ExtractInfo // Create can optionally be set to control the creation of extracted entries. // extractInfos is set to the matching entries in Extract, and is nil in cases where // the created entry is implicit and unlisted (for example, parent directories). @@ -38,6 +40,22 @@ type ExtractInfo struct { Context any } +type hardLinkRevMapEntry struct { + Target []string + Identifier int + inStaging bool +} + +type hardLink struct { + Target string + Identifier int +} + +type tarMetadata struct { + HardLinks map[string]hardLink + HardLinkRevMap map[string]hardLinkRevMapEntry +} + func getValidOptions(options *ExtractOptions) (*ExtractOptions, error) { for extractPath, extractInfos := range options.Extract { isGlob := strings.ContainsAny(extractPath, "*?") @@ -62,7 +80,7 @@ func getValidOptions(options *ExtractOptions) (*ExtractOptions, error) { return options, nil } -func Extract(pkgReader io.Reader, options *ExtractOptions) (err error) { +func Extract(pkgReader io.ReadSeeker, options *ExtractOptions) (err error) { defer func() { if err != nil { err = fmt.Errorf("cannot extract from package %q: %w", options.Package, err) @@ -83,44 +101,46 @@ func Extract(pkgReader io.Reader, options *ExtractOptions) (err error) { return err } + return extractData(pkgReader, validOpts) +} + +func getDataReader(pkgReader io.ReadSeeker, close bool) (io.ReadCloser, error) { arReader := ar.NewReader(pkgReader) - var dataReader io.Reader + var dataReader io.ReadCloser for dataReader == nil { arHeader, err := arReader.Next() if err == io.EOF { - return fmt.Errorf("no data payload") + return nil, fmt.Errorf("no data payload") } if err != nil { - return err + return nil, err } switch arHeader.Name { case "data.tar.gz": gzipReader, err := gzip.NewReader(arReader) if err != nil { - return err + return nil, err } - defer gzipReader.Close() dataReader = gzipReader case "data.tar.xz": xzReader, err := xz.NewReader(arReader) if err != nil { - return err + return nil, err } - dataReader = xzReader + dataReader = io.NopCloser(xzReader) case "data.tar.zst": zstdReader, err := zstd.NewReader(arReader) if err != nil { - return err + return nil, err } - defer zstdReader.Close() - dataReader = zstdReader + dataReader = zstdReader.IOReadCloser() } } - return extractData(dataReader, validOpts) -} -func extractData(dataReader io.Reader, options *ExtractOptions) error { + return dataReader, nil +} +func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { oldUmask := syscall.Umask(0) defer func() { syscall.Umask(oldUmask) @@ -136,6 +156,18 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { } } + // Read the metadata of the tarball to determine hard links. + dataReader, err := getDataReader(pkgReader, false) + if err != nil { + return err + } + tarReader := tar.NewReader(dataReader) + tarMetadata, err := readTarMetadata(tarReader) + if err != nil { + return err + } + // Rewind back to the start of the tarball and extract the files. + pkgReader.Seek(0, io.SeekStart) // When creating a file we will iterate through its parent directories and // create them with the permissions defined in the tarball. // @@ -143,7 +175,12 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { // before the entry for the file itself. This is the case for .deb files but // not for all tarballs. tarDirMode := make(map[string]fs.FileMode) - tarReader := tar.NewReader(dataReader) + dataReader, err = getDataReader(pkgReader, false) + if err != nil { + return err + } + tarReader = tar.NewReader(dataReader) + defer dataReader.Close() for { tarHeader, err := tarReader.Next() if err == io.EOF { @@ -153,6 +190,7 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { return err } + targetDir := options.TargetDir sourcePath := tarHeader.Name if len(sourcePath) < 3 || sourcePath[0] != '.' || sourcePath[1] != '/' { continue @@ -187,8 +225,24 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { } } if len(targetPaths) == 0 { - // Nothing to do. - continue + if tarHeader.Typeflag == tar.TypeReg { + // Extract the hard link base file to the staging directory, when + // 1. it is required by other hard links () + // 2. it is not part of the target paths (len(targetPaths) == 0) + // tarHeader.Name is used since the paths in the tarMetadata are relative + if entry, ok := tarMetadata.HardLinkRevMap[tarHeader.Name]; ok { + targetDir = options.StagingDir + entry.inStaging = true + tarMetadata.HardLinkRevMap[tarHeader.Name] = entry + targetPaths[sourcePath] = append(targetPaths[sourcePath], ExtractInfo{ + Path: sourcePath, + Mode: uint(tarHeader.FileInfo().Mode()), + }) + } + } else { + // Nothing to do. + continue + } } var contentCache []byte @@ -236,7 +290,7 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { delete(tarDirMode, path) createOptions := &fsutil.CreateOptions{ - Path: filepath.Join(options.TargetDir, path), + Path: filepath.Join(targetDir, path), Mode: mode, MakeParents: true, } @@ -247,17 +301,31 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { } // Create the entry itself. link := tarHeader.Linkname + hardLinkId := 0 if tarHeader.Typeflag == tar.TypeLink { - // A hard link requires the real path of the target file. - link = filepath.Join(options.TargetDir, link) + if entry, ok := tarMetadata.HardLinkRevMap[link]; ok { + if entry.inStaging { + // The hard link base file is not extracted yet. + // It will be extracted to the staging directory. + link = filepath.Join(options.StagingDir, link) + } else { + link = filepath.Join(targetDir, link) + } + hardLinkId = int(entry.Identifier) + } else { + return fmt.Errorf("hard link target %s not found in the tarball header", tarHeader.Linkname) + } + } + if entry, ok := tarMetadata.HardLinkRevMap["."+targetPath]; ok { + hardLinkId = int(entry.Identifier) } - createOptions := &fsutil.CreateOptions{ - Path: filepath.Join(options.TargetDir, targetPath), + Path: filepath.Join(targetDir, targetPath), Mode: tarHeader.FileInfo().Mode(), Data: pathReader, Link: link, MakeParents: true, + HardLinkId: hardLinkId, } err := options.Create(extractInfos, createOptions) if err != nil { @@ -294,3 +362,57 @@ func parentDirs(path string) []string { } return parents } + +func NewTarMetadata() tarMetadata { + return tarMetadata{ + HardLinks: make(map[string]hardLink), + HardLinkRevMap: make(map[string]hardLinkRevMapEntry), + } +} + +func readTarMetadata(tarReader *tar.Reader) (tarMetadata, error) { + metadata := NewTarMetadata() + var hardLinks = make(map[string]string) + var hardLinkRevMap = make(map[string][]string) + for { + tarHeader, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return metadata, err + } + + if tarHeader.Typeflag == tar.TypeLink { + sourcePath := tarHeader.Name + linkPath := tarHeader.Linkname + hardLinks[sourcePath] = linkPath + hardLinkRevMap[linkPath] = append(hardLinkRevMap[linkPath], sourcePath) + } + } + + // Sort the hard link targets to ensure a deterministic HardLinkId in the report + sortedTargets := make([]string, 0, len(hardLinkRevMap)) + for target, _ := range hardLinkRevMap { + sortedTargets = append(sortedTargets, target) + } + sort.Strings(sortedTargets) + + for idx, target := range sortedTargets { + sources := hardLinkRevMap[target] + metadata.HardLinkRevMap[target] = hardLinkRevMapEntry{ + Target: sources, + Identifier: idx + 1, + inStaging: false, + } + } + + for source, target := range hardLinks { + metadata.HardLinks[source] = hardLink{ + Target: target, + Identifier: metadata.HardLinkRevMap[target].Identifier, + } + } + + return metadata, nil +} diff --git a/internal/deb/extract_test.go b/internal/deb/extract_test.go index 1e78f641..911dbd11 100644 --- a/internal/deb/extract_test.go +++ b/internal/deb/extract_test.go @@ -433,7 +433,7 @@ func (s *S) TestExtract(c *C) { test.hackopt(&options) } - err := deb.Extract(bytes.NewBuffer(test.pkgdata), &options) + err := deb.Extract(bytes.NewReader(test.pkgdata), &options) if test.error != "" { c.Assert(err, ErrorMatches, test.error) continue @@ -544,7 +544,7 @@ func (s *S) TestExtractCreateCallback(c *C) { return nil } - err := deb.Extract(bytes.NewBuffer(test.pkgdata), &options) + err := deb.Extract(bytes.NewReader(test.pkgdata), &options) c.Assert(err, IsNil) c.Assert(createExtractInfos, DeepEquals, test.calls) diff --git a/internal/fsutil/create.go b/internal/fsutil/create.go index 06ab8e1d..70987038 100644 --- a/internal/fsutil/create.go +++ b/internal/fsutil/create.go @@ -22,14 +22,16 @@ type CreateOptions struct { // If MakeParents is true, missing parent directories of Path are // created with permissions 0755. MakeParents bool + HardLinkId int } type Entry struct { - Path string - Mode fs.FileMode - Hash string - Size int - Link string + Path string + Mode fs.FileMode + Hash string + Size int + Link string + HardLinkId int } // Create creates a filesystem entry according to the provided options and returns @@ -75,11 +77,12 @@ func Create(options *CreateOptions) (*Entry, error) { return nil, err } entry := &Entry{ - Path: o.Path, - Mode: s.Mode(), - Hash: hash, - Size: rp.size, - Link: o.Link, + Path: o.Path, + Mode: s.Mode(), + Hash: hash, + Size: rp.size, + Link: o.Link, + HardLinkId: o.HardLinkId, } return entry, nil } diff --git a/internal/slicer/report.go b/internal/slicer/report.go index 8897f518..e1954ea7 100644 --- a/internal/slicer/report.go +++ b/internal/slicer/report.go @@ -11,13 +11,14 @@ import ( ) type ReportEntry struct { - Path string - Mode fs.FileMode - Hash string - Size int - Slices map[*setup.Slice]bool - Link string - FinalHash string + Path string + Mode fs.FileMode + Hash string + Size int + Slices map[*setup.Slice]bool + Link string + FinalHash string + HardLinkId int } // Report holds the information about files and directories created when slicing @@ -62,12 +63,13 @@ func (r *Report) Add(slice *setup.Slice, fsEntry *fsutil.Entry) error { r.Entries[relPath] = entry } else { r.Entries[relPath] = ReportEntry{ - Path: relPath, - Mode: fsEntry.Mode, - Hash: fsEntry.Hash, - Size: fsEntry.Size, - Slices: map[*setup.Slice]bool{slice: true}, - Link: fsEntry.Link, + Path: relPath, + Mode: fsEntry.Mode, + Hash: fsEntry.Hash, + Size: fsEntry.Size, + Slices: map[*setup.Slice]bool{slice: true}, + Link: fsEntry.Link, + HardLinkId: fsEntry.HardLinkId, } } return nil diff --git a/internal/slicer/slicer.go b/internal/slicer/slicer.go index f164efd8..cbeaab9c 100644 --- a/internal/slicer/slicer.go +++ b/internal/slicer/slicer.go @@ -144,7 +144,7 @@ func Run(options *RunOptions) (*Report, error) { } // Fetch all packages, using the selection order. - packages := make(map[string]io.ReadCloser) + packages := make(map[string]io.ReadSeekCloser) for _, slice := range options.Selection.Slices { if packages[slice.Package] != nil { continue @@ -227,10 +227,11 @@ func Run(options *RunOptions) (*Report, error) { continue } err := deb.Extract(reader, &deb.ExtractOptions{ - Package: slice.Package, - Extract: extract[slice.Package], - TargetDir: targetDir, - Create: create, + Package: slice.Package, + Extract: extract[slice.Package], + TargetDir: targetDir, + StagingDir: os.TempDir(), // TODo: create a temp dir in /tmp instead of using os.TempDir() + Create: create, }) reader.Close() packages[slice.Package] = nil diff --git a/internal/slicer/slicer_test.go b/internal/slicer/slicer_test.go index cd40f3ce..77e47684 100644 --- a/internal/slicer/slicer_test.go +++ b/internal/slicer/slicer_test.go @@ -1085,7 +1085,70 @@ var slicerTests = []slicerTest{{ }, report: map[string]string{ "/dir/file": "file 0644 28121945 {test-package_slice1,test-package_slice2}", - "/hardlink": "hardlink /dir/file {test-package_slice1,test-package_slice2}", + "/hardlink": "hardlink 1 {test-package_slice1,test-package_slice2}", + }, +}, { + summary: "Empty hard link is inflated with its counterpart", + slices: []setup.SliceKey{ + {"test-package", "myslice"}}, + pkgs: map[string][]byte{ + "test-package": testutil.MustMakeDeb([]testutil.TarEntry{ + testutil.Dir(0755, "./"), + testutil.Dir(0755, "./dir/"), + testutil.Reg(0644, "./dir/file", "text for file"), + testutil.Hln(0644, "./hardlink1", "./dir/file"), + testutil.Hln(0644, "./hardlink2", "./dir/file"), + }), + }, + release: map[string]string{ + "slices/mydir/test-package.yaml": ` + package: test-package + slices: + myslice: + contents: + /hardlink1: + /hardlink2: + `, + }, + filesystem: map[string]string{ + "/hardlink1": "file 0644 28121945", + "/hardlink2": "file 0644 28121945", + }, + report: map[string]string{ + "/hardlink1": "hardlink 1 {test-package_myslice}", + "/hardlink2": "hardlink 1 {test-package_myslice}", + }, +}, { + summary: "Hard link identifier distinguishes different hard links", + slices: []setup.SliceKey{ + {"test-package", "myslice"}}, + pkgs: map[string][]byte{ + "test-package": testutil.MustMakeDeb([]testutil.TarEntry{ + testutil.Dir(0755, "./"), + testutil.Dir(0755, "./dir/"), + testutil.Reg(0644, "./dir/file1", "text for file1"), + testutil.Reg(0644, "./dir/file2", "text for file2"), + testutil.Hln(0644, "./hardlink1", "./dir/file1"), + testutil.Hln(0644, "./hardlink2", "./dir/file2"), + }), + }, + release: map[string]string{ + "slices/mydir/test-package.yaml": ` + package: test-package + slices: + myslice: + contents: + /hardlink1: + /hardlink2: + `, + }, + filesystem: map[string]string{ + "/hardlink1": "file 0644 df82bbbd", + "/hardlink2": "file 0644 dcddda2e", + }, + report: map[string]string{ + "/hardlink1": "hardlink 1 {test-package_myslice}", + "/hardlink2": "hardlink 2 {test-package_myslice}", }, }} @@ -1111,9 +1174,9 @@ func (a *testArchive) Options() *archive.Options { return &a.options } -func (a *testArchive) Fetch(pkg string) (io.ReadCloser, error) { +func (a *testArchive) Fetch(pkg string) (io.ReadSeekCloser, error) { if data, ok := a.pkgs[pkg]; ok { - return io.NopCloser(bytes.NewBuffer(data)), nil + return testutil.ReadSeekerNopCloser(bytes.NewReader(data)), nil } return nil, fmt.Errorf("attempted to open %q package", pkg) } @@ -1235,8 +1298,8 @@ func treeDumpReport(report *slicer.Report) map[string]string { case 0: if entry.Link != "" { // Hard link. - relLink := filepath.Clean("/" + strings.TrimPrefix(entry.Link, report.Root)) - fsDump = fmt.Sprintf("hardlink %s", relLink) + // relLink := filepath.Clean("/" + strings.TrimPrefix(entry.Link, report.Root)) + fsDump = fmt.Sprintf("hardlink %d", entry.HardLinkId) } else { // Regular file. if entry.Size == 0 { diff --git a/internal/testutil/nop_closer.go b/internal/testutil/nop_closer.go new file mode 100644 index 00000000..79a9839f --- /dev/null +++ b/internal/testutil/nop_closer.go @@ -0,0 +1,19 @@ +package testutil + +import ( + "io" +) + +// NopSeekCloser is an io.Reader that does nothing on Close, and +// seeks to the beginning of the stream on Seek. +// It is an extension of io.NopCloser that also implements io.Seeker. +type readSeekerNopCloser struct { + io.ReadSeeker +} + +// Close does nothing. +func (readSeekerNopCloser) Close() error { return nil } + +func ReadSeekerNopCloser(r io.ReadSeeker) io.ReadSeekCloser { + return readSeekerNopCloser{r} +} From fa7c62a6413a6e548f8b336fe3a7e85b673f7650 Mon Sep 17 00:00:00 2001 From: Zhijie Yang Date: Thu, 5 Sep 2024 16:36:11 +0200 Subject: [PATCH 2/4] feat: remove unused hardLinks from tarMetadata and add test --- internal/deb/extract.go | 39 ++++++++++++--------------------- internal/slicer/slicer.go | 8 +++++-- internal/slicer/slicer_test.go | 40 ++++++++++++++++++++++++++++++++-- 3 files changed, 58 insertions(+), 29 deletions(-) diff --git a/internal/deb/extract.go b/internal/deb/extract.go index 7fd69ec2..7513ef04 100644 --- a/internal/deb/extract.go +++ b/internal/deb/extract.go @@ -46,13 +46,7 @@ type hardLinkRevMapEntry struct { inStaging bool } -type hardLink struct { - Target string - Identifier int -} - type tarMetadata struct { - HardLinks map[string]hardLink HardLinkRevMap map[string]hardLinkRevMapEntry } @@ -227,9 +221,10 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { if len(targetPaths) == 0 { if tarHeader.Typeflag == tar.TypeReg { // Extract the hard link base file to the staging directory, when - // 1. it is required by other hard links () + // 1. it is required by other hard links (exists as a key in the HardLinkRevMap) // 2. it is not part of the target paths (len(targetPaths) == 0) - // tarHeader.Name is used since the paths in the tarMetadata are relative + // In case that [len(targetPaths) > 0], the hard link base file is extracted normally. + // tarHeader.Name is used since the paths in the HardLinkRevMap are relative if entry, ok := tarMetadata.HardLinkRevMap[tarHeader.Name]; ok { targetDir = options.StagingDir entry.inStaging = true @@ -303,19 +298,23 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { link := tarHeader.Linkname hardLinkId := 0 if tarHeader.Typeflag == tar.TypeLink { + // Set the [link] to the absolute path if it's a hard link if entry, ok := tarMetadata.HardLinkRevMap[link]; ok { + // Set the [link] w.r.t. to different path prefix depending + // on whether the base file is in the staging directory. if entry.inStaging { - // The hard link base file is not extracted yet. - // It will be extracted to the staging directory. link = filepath.Join(options.StagingDir, link) } else { link = filepath.Join(targetDir, link) } + // Set the hardLinkId for hard links hardLinkId = int(entry.Identifier) } else { return fmt.Errorf("hard link target %s not found in the tarball header", tarHeader.Linkname) } } + // Set the HardLinkId to both the hard link base file, + // so they are symmetric in the report. if entry, ok := tarMetadata.HardLinkRevMap["."+targetPath]; ok { hardLinkId = int(entry.Identifier) } @@ -365,14 +364,12 @@ func parentDirs(path string) []string { func NewTarMetadata() tarMetadata { return tarMetadata{ - HardLinks: make(map[string]hardLink), HardLinkRevMap: make(map[string]hardLinkRevMapEntry), } } func readTarMetadata(tarReader *tar.Reader) (tarMetadata, error) { metadata := NewTarMetadata() - var hardLinks = make(map[string]string) var hardLinkRevMap = make(map[string][]string) for { tarHeader, err := tarReader.Next() @@ -386,19 +383,18 @@ func readTarMetadata(tarReader *tar.Reader) (tarMetadata, error) { if tarHeader.Typeflag == tar.TypeLink { sourcePath := tarHeader.Name linkPath := tarHeader.Linkname - hardLinks[sourcePath] = linkPath hardLinkRevMap[linkPath] = append(hardLinkRevMap[linkPath], sourcePath) } } // Sort the hard link targets to ensure a deterministic HardLinkId in the report - sortedTargets := make([]string, 0, len(hardLinkRevMap)) - for target, _ := range hardLinkRevMap { - sortedTargets = append(sortedTargets, target) + targets := make([]string, 0, len(hardLinkRevMap)) + for target := range hardLinkRevMap { + targets = append(targets, target) } - sort.Strings(sortedTargets) + sort.Strings(targets) - for idx, target := range sortedTargets { + for idx, target := range targets { sources := hardLinkRevMap[target] metadata.HardLinkRevMap[target] = hardLinkRevMapEntry{ Target: sources, @@ -407,12 +403,5 @@ func readTarMetadata(tarReader *tar.Reader) (tarMetadata, error) { } } - for source, target := range hardLinks { - metadata.HardLinks[source] = hardLink{ - Target: target, - Identifier: metadata.HardLinkRevMap[target].Identifier, - } - } - return metadata, nil } diff --git a/internal/slicer/slicer.go b/internal/slicer/slicer.go index cbeaab9c..5108c106 100644 --- a/internal/slicer/slicer.go +++ b/internal/slicer/slicer.go @@ -226,11 +226,15 @@ func Run(options *RunOptions) (*Report, error) { if reader == nil { continue } - err := deb.Extract(reader, &deb.ExtractOptions{ + stagingDir, err := os.MkdirTemp("", "chisel-staging-*") + if err != nil { + return nil, err + } + err = deb.Extract(reader, &deb.ExtractOptions{ Package: slice.Package, Extract: extract[slice.Package], TargetDir: targetDir, - StagingDir: os.TempDir(), // TODo: create a temp dir in /tmp instead of using os.TempDir() + StagingDir: stagingDir, Create: create, }) reader.Close() diff --git a/internal/slicer/slicer_test.go b/internal/slicer/slicer_test.go index 77e47684..7ec93369 100644 --- a/internal/slicer/slicer_test.go +++ b/internal/slicer/slicer_test.go @@ -1084,7 +1084,7 @@ var slicerTests = []slicerTest{{ "/hardlink": "file 0644 28121945", }, report: map[string]string{ - "/dir/file": "file 0644 28121945 {test-package_slice1,test-package_slice2}", + "/dir/file": "hardlink 1 {test-package_slice1,test-package_slice2}", "/hardlink": "hardlink 1 {test-package_slice1,test-package_slice2}", }, }, { @@ -1150,6 +1150,42 @@ var slicerTests = []slicerTest{{ "/hardlink1": "hardlink 1 {test-package_myslice}", "/hardlink2": "hardlink 2 {test-package_myslice}", }, +}, { + summary: "Hard links handled with wildcard", + slices: []setup.SliceKey{ + {"test-package", "myslice"}}, + pkgs: map[string][]byte{ + "test-package": testutil.MustMakeDeb([]testutil.TarEntry{ + testutil.Dir(0755, "./"), + testutil.Dir(0755, "./dir/"), + testutil.Reg(0644, "./file1.txt", "text for file1"), + testutil.Reg(0644, "./dir/file2.txt", "text for file2"), + testutil.Hln(0644, "./dir/hardlink1.txt", "./file1.txt"), + testutil.Hln(0644, "./hardlink2.txt", "./dir/file2.txt"), + }), + }, + release: map[string]string{ + "slices/mydir/test-package.yaml": ` + package: test-package + slices: + myslice: + contents: + /**.txt: + `, + }, + filesystem: map[string]string{ + "/dir/": "dir 0755", + "/dir/file2.txt": "file 0644 dcddda2e", + "/dir/hardlink1.txt": "file 0644 df82bbbd", + "/file1.txt": "file 0644 df82bbbd", + "/hardlink2.txt": "file 0644 dcddda2e", + }, + report: map[string]string{ + "/file1.txt": "hardlink 2 {test-package_myslice}", + "/dir/file2.txt": "hardlink 1 {test-package_myslice}", + "/dir/hardlink1.txt": "hardlink 2 {test-package_myslice}", + "/hardlink2.txt": "hardlink 1 {test-package_myslice}", + }, }} var defaultChiselYaml = ` @@ -1296,7 +1332,7 @@ func treeDumpReport(report *slicer.Report) map[string]string { case fs.ModeSymlink: fsDump = fmt.Sprintf("symlink %s", entry.Link) case 0: - if entry.Link != "" { + if entry.HardLinkId > 0 { // Hard link. // relLink := filepath.Clean("/" + strings.TrimPrefix(entry.Link, report.Root)) fsDump = fmt.Sprintf("hardlink %d", entry.HardLinkId) From 9b8415e485ce1deb277bf997d5490151f35e4708 Mon Sep 17 00:00:00 2001 From: Zhijie Yang Date: Fri, 6 Sep 2024 14:41:40 +0200 Subject: [PATCH 3/4] chore: simplify ExtractOptions --- internal/archive/archive_test.go | 5 ++-- internal/deb/extract.go | 50 ++++++++++++++++++-------------- internal/slicer/slicer.go | 10 +++---- internal/slicer/slicer_test.go | 43 ++++++++++++++++++++++----- 4 files changed, 69 insertions(+), 39 deletions(-) diff --git a/internal/archive/archive_test.go b/internal/archive/archive_test.go index fd1d5ff7..af741dbe 100644 --- a/internal/archive/archive_test.go +++ b/internal/archive/archive_test.go @@ -490,9 +490,8 @@ func (s *S) testOpenArchiveArch(c *C, release ubuntuRelease, arch string) { c.Assert(err, IsNil) err = deb.Extract(pkg, &deb.ExtractOptions{ - Package: "hostname", - TargetDir: extractDir, - StagingDir: c.MkDir(), + Package: "hostname", + TargetDir: extractDir, Extract: map[string][]deb.ExtractInfo{ "/usr/share/doc/hostname/copyright": { {Path: "/copyright"}, diff --git a/internal/deb/extract.go b/internal/deb/extract.go index 7513ef04..8fec500d 100644 --- a/internal/deb/extract.go +++ b/internal/deb/extract.go @@ -14,19 +14,17 @@ import ( "syscall" "github.com/blakesmith/ar" - "github.com/canonical/chisel/internal/fsutil" - "github.com/canonical/chisel/internal/strdist" "github.com/klauspost/compress/zstd" "github.com/ulikunitz/xz" + + "github.com/canonical/chisel/internal/fsutil" + "github.com/canonical/chisel/internal/strdist" ) type ExtractOptions struct { Package string TargetDir string - // StageDir is the directory where the hard link base file is located, - // if this file is not within the target paths. - StagingDir string - Extract map[string][]ExtractInfo + Extract map[string][]ExtractInfo // Create can optionally be set to control the creation of extracted entries. // extractInfos is set to the matching entries in Extract, and is nil in cases where // the created entry is implicit and unlisted (for example, parent directories). @@ -40,10 +38,12 @@ type ExtractInfo struct { Context any } +// The identifier for the hardlink is unique for each unique base file, +// which counts from 1. The 0 value is reserved for files that are not hard links. type hardLinkRevMapEntry struct { - Target []string - Identifier int - inStaging bool + Target []string + Identifier int + StagingPath string } type tarMetadata struct { @@ -98,7 +98,7 @@ func Extract(pkgReader io.ReadSeeker, options *ExtractOptions) (err error) { return extractData(pkgReader, validOpts) } -func getDataReader(pkgReader io.ReadSeeker, close bool) (io.ReadCloser, error) { +func getDataReader(pkgReader io.ReadSeeker) (io.ReadCloser, error) { arReader := ar.NewReader(pkgReader) var dataReader io.ReadCloser for dataReader == nil { @@ -135,6 +135,12 @@ func getDataReader(pkgReader io.ReadSeeker, close bool) (io.ReadCloser, error) { } func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { + // stagingDir is the directory where the hard link base file is located, + // if this file is not within the target paths. + stagingDir, err := os.MkdirTemp("", "chisel-staging-") + if err != nil { + return err + } oldUmask := syscall.Umask(0) defer func() { syscall.Umask(oldUmask) @@ -151,7 +157,7 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { } // Read the metadata of the tarball to determine hard links. - dataReader, err := getDataReader(pkgReader, false) + dataReader, err := getDataReader(pkgReader) if err != nil { return err } @@ -169,12 +175,12 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { // before the entry for the file itself. This is the case for .deb files but // not for all tarballs. tarDirMode := make(map[string]fs.FileMode) - dataReader, err = getDataReader(pkgReader, false) + dataReader, err = getDataReader(pkgReader) if err != nil { return err } - tarReader = tar.NewReader(dataReader) defer dataReader.Close() + tarReader = tar.NewReader(dataReader) for { tarHeader, err := tarReader.Next() if err == io.EOF { @@ -226,8 +232,8 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { // In case that [len(targetPaths) > 0], the hard link base file is extracted normally. // tarHeader.Name is used since the paths in the HardLinkRevMap are relative if entry, ok := tarMetadata.HardLinkRevMap[tarHeader.Name]; ok { - targetDir = options.StagingDir - entry.inStaging = true + targetDir = stagingDir + entry.StagingPath = filepath.Join(stagingDir, tarHeader.Name) tarMetadata.HardLinkRevMap[tarHeader.Name] = entry targetPaths[sourcePath] = append(targetPaths[sourcePath], ExtractInfo{ Path: sourcePath, @@ -302,8 +308,8 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { if entry, ok := tarMetadata.HardLinkRevMap[link]; ok { // Set the [link] w.r.t. to different path prefix depending // on whether the base file is in the staging directory. - if entry.inStaging { - link = filepath.Join(options.StagingDir, link) + if entry.StagingPath != "" { + link = entry.StagingPath } else { link = filepath.Join(targetDir, link) } @@ -362,14 +368,14 @@ func parentDirs(path string) []string { return parents } -func NewTarMetadata() tarMetadata { +func newTarMetadata() tarMetadata { return tarMetadata{ HardLinkRevMap: make(map[string]hardLinkRevMapEntry), } } func readTarMetadata(tarReader *tar.Reader) (tarMetadata, error) { - metadata := NewTarMetadata() + metadata := newTarMetadata() var hardLinkRevMap = make(map[string][]string) for { tarHeader, err := tarReader.Next() @@ -397,9 +403,9 @@ func readTarMetadata(tarReader *tar.Reader) (tarMetadata, error) { for idx, target := range targets { sources := hardLinkRevMap[target] metadata.HardLinkRevMap[target] = hardLinkRevMapEntry{ - Target: sources, - Identifier: idx + 1, - inStaging: false, + Target: sources, + Identifier: idx + 1, + StagingPath: "", } } diff --git a/internal/slicer/slicer.go b/internal/slicer/slicer.go index 5108c106..abc20fdc 100644 --- a/internal/slicer/slicer.go +++ b/internal/slicer/slicer.go @@ -226,16 +226,14 @@ func Run(options *RunOptions) (*Report, error) { if reader == nil { continue } - stagingDir, err := os.MkdirTemp("", "chisel-staging-*") if err != nil { return nil, err } err = deb.Extract(reader, &deb.ExtractOptions{ - Package: slice.Package, - Extract: extract[slice.Package], - TargetDir: targetDir, - StagingDir: stagingDir, - Create: create, + Package: slice.Package, + Extract: extract[slice.Package], + TargetDir: targetDir, + Create: create, }) reader.Close() packages[slice.Package] = nil diff --git a/internal/slicer/slicer_test.go b/internal/slicer/slicer_test.go index 7ec93369..85402bae 100644 --- a/internal/slicer/slicer_test.go +++ b/internal/slicer/slicer_test.go @@ -1158,10 +1158,10 @@ var slicerTests = []slicerTest{{ "test-package": testutil.MustMakeDeb([]testutil.TarEntry{ testutil.Dir(0755, "./"), testutil.Dir(0755, "./dir/"), - testutil.Reg(0644, "./file1.txt", "text for file1"), - testutil.Reg(0644, "./dir/file2.txt", "text for file2"), - testutil.Hln(0644, "./dir/hardlink1.txt", "./file1.txt"), - testutil.Hln(0644, "./hardlink2.txt", "./dir/file2.txt"), + testutil.Reg(0644, "./file1", "text for file1"), + testutil.Reg(0644, "./dir/file2", "text for file2"), + testutil.Hln(0644, "./dir/hardlink1.txt", "./file1"), + testutil.Hln(0644, "./hardlink2.txt", "./dir/file2"), }), }, release: map[string]string{ @@ -1175,17 +1175,44 @@ var slicerTests = []slicerTest{{ }, filesystem: map[string]string{ "/dir/": "dir 0755", - "/dir/file2.txt": "file 0644 dcddda2e", "/dir/hardlink1.txt": "file 0644 df82bbbd", - "/file1.txt": "file 0644 df82bbbd", "/hardlink2.txt": "file 0644 dcddda2e", }, report: map[string]string{ - "/file1.txt": "hardlink 2 {test-package_myslice}", - "/dir/file2.txt": "hardlink 1 {test-package_myslice}", "/dir/hardlink1.txt": "hardlink 2 {test-package_myslice}", "/hardlink2.txt": "hardlink 1 {test-package_myslice}", }, +}, { + summary: "Hard links and their counterparts should be equal in the report", + slices: []setup.SliceKey{ + {"test-package", "myslice"}}, + pkgs: map[string][]byte{ + "test-package": testutil.MustMakeDeb([]testutil.TarEntry{ + testutil.Dir(0755, "./"), + testutil.Dir(0755, "./dir/"), + testutil.Reg(0644, "./dir/file", "text for file"), + testutil.Hln(0644, "./hardlink", "./dir/file"), + }), + }, + release: map[string]string{ + "slices/mydir/test-package.yaml": ` + package: test-package + slices: + myslice: + contents: + /**: + `, + }, + filesystem: map[string]string{ + "/dir/": "dir 0755", + "/dir/file": "file 0644 28121945", + "/hardlink": "file 0644 28121945", + }, + report: map[string]string{ + "/dir/": "dir 0755 {test-package_myslice}", + "/dir/file": "hardlink 1 {test-package_myslice}", + "/hardlink": "hardlink 1 {test-package_myslice}", + }, }} var defaultChiselYaml = ` From 9c84aaf7c2cfb927505eca54aecb8dc3d4952fbc Mon Sep 17 00:00:00 2001 From: Zhijie Yang Date: Fri, 6 Sep 2024 15:53:39 +0200 Subject: [PATCH 4/4] feat: allow symlink to be hardlink targets --- internal/deb/extract.go | 80 ++++++++++++++++++---------------- internal/slicer/slicer_test.go | 41 +++++++++++++++-- 2 files changed, 81 insertions(+), 40 deletions(-) diff --git a/internal/deb/extract.go b/internal/deb/extract.go index 8fec500d..c041734d 100644 --- a/internal/deb/extract.go +++ b/internal/deb/extract.go @@ -98,6 +98,10 @@ func Extract(pkgReader io.ReadSeeker, options *ExtractOptions) (err error) { return extractData(pkgReader, validOpts) } +// getDataReader returns a ReadCloser for the data payload of the package. +// Calling the Close method must happen outside of this function to prevent +// premature closing of the underlying package reader. +// The xz.Reader is wrapper with io.NopCloser since it does not implement the Close method. func getDataReader(pkgReader io.ReadSeeker) (io.ReadCloser, error) { arReader := ar.NewReader(pkgReader) var dataReader io.ReadCloser @@ -135,12 +139,32 @@ func getDataReader(pkgReader io.ReadSeeker) (io.ReadCloser, error) { } func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { - // stagingDir is the directory where the hard link base file is located, - // if this file is not within the target paths. + // stagingDir is the directory where the hard link base file, which is not + // listed in the pendingPaths, to be extracted. stagingDir, err := os.MkdirTemp("", "chisel-staging-") if err != nil { return err } + // Fist pass over the tarball to read the header of the tarball + // and create its metadata. + dataReader, err := getDataReader(pkgReader) + if err != nil { + return err + } + tarReader := tar.NewReader(dataReader) + tarMetadata, err := readTarMetadata(tarReader) + if err != nil { + return err + } + // Rewind back to the start of the tarball and extract the files. + pkgReader.Seek(0, io.SeekStart) + // Second pass over the tarball to extract the files. + dataReader, err = getDataReader(pkgReader) + if err != nil { + return err + } + defer dataReader.Close() + oldUmask := syscall.Umask(0) defer func() { syscall.Umask(oldUmask) @@ -156,18 +180,6 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { } } - // Read the metadata of the tarball to determine hard links. - dataReader, err := getDataReader(pkgReader) - if err != nil { - return err - } - tarReader := tar.NewReader(dataReader) - tarMetadata, err := readTarMetadata(tarReader) - if err != nil { - return err - } - // Rewind back to the start of the tarball and extract the files. - pkgReader.Seek(0, io.SeekStart) // When creating a file we will iterate through its parent directories and // create them with the permissions defined in the tarball. // @@ -175,11 +187,6 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { // before the entry for the file itself. This is the case for .deb files but // not for all tarballs. tarDirMode := make(map[string]fs.FileMode) - dataReader, err = getDataReader(pkgReader) - if err != nil { - return err - } - defer dataReader.Close() tarReader = tar.NewReader(dataReader) for { tarHeader, err := tarReader.Next() @@ -190,6 +197,8 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { return err } + // targetDir is the directory where the file is extracted. + // It is either the options.TargetDir or the stagingDir. targetDir := options.TargetDir sourcePath := tarHeader.Name if len(sourcePath) < 3 || sourcePath[0] != '.' || sourcePath[1] != '/' { @@ -225,21 +234,20 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { } } if len(targetPaths) == 0 { - if tarHeader.Typeflag == tar.TypeReg { - // Extract the hard link base file to the staging directory, when - // 1. it is required by other hard links (exists as a key in the HardLinkRevMap) - // 2. it is not part of the target paths (len(targetPaths) == 0) - // In case that [len(targetPaths) > 0], the hard link base file is extracted normally. - // tarHeader.Name is used since the paths in the HardLinkRevMap are relative - if entry, ok := tarMetadata.HardLinkRevMap[tarHeader.Name]; ok { - targetDir = stagingDir - entry.StagingPath = filepath.Join(stagingDir, tarHeader.Name) - tarMetadata.HardLinkRevMap[tarHeader.Name] = entry - targetPaths[sourcePath] = append(targetPaths[sourcePath], ExtractInfo{ - Path: sourcePath, - Mode: uint(tarHeader.FileInfo().Mode()), - }) - } + // Extract the hard link base file to the staging directory, when + // 1. it is not part of the target paths (len(targetPaths) == 0) + // 2. it is required by other hard links (exists as a key in the HardLinkRevMap) + // In case that [len(targetPaths) > 0], the hard link base file is extracted normally. + // Note that the hard link base file can also be a symlink. + // tarHeader.Name is used here since the paths in the HardLinkRevMap are relative. + if entry, ok := tarMetadata.HardLinkRevMap[tarHeader.Name]; ok { + targetDir = stagingDir + entry.StagingPath = filepath.Join(stagingDir, tarHeader.Name) + tarMetadata.HardLinkRevMap[tarHeader.Name] = entry + targetPaths[sourcePath] = append(targetPaths[sourcePath], ExtractInfo{ + Path: sourcePath, + Mode: uint(tarHeader.FileInfo().Mode()), + }) } else { // Nothing to do. continue @@ -306,8 +314,6 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { if tarHeader.Typeflag == tar.TypeLink { // Set the [link] to the absolute path if it's a hard link if entry, ok := tarMetadata.HardLinkRevMap[link]; ok { - // Set the [link] w.r.t. to different path prefix depending - // on whether the base file is in the staging directory. if entry.StagingPath != "" { link = entry.StagingPath } else { @@ -319,7 +325,7 @@ func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error { return fmt.Errorf("hard link target %s not found in the tarball header", tarHeader.Linkname) } } - // Set the HardLinkId to both the hard link base file, + // Set the HardLinkId to both the hard link and its counterpart file, // so they are symmetric in the report. if entry, ok := tarMetadata.HardLinkRevMap["."+targetPath]; ok { hardLinkId = int(entry.Identifier) diff --git a/internal/slicer/slicer_test.go b/internal/slicer/slicer_test.go index 85402bae..565bb67e 100644 --- a/internal/slicer/slicer_test.go +++ b/internal/slicer/slicer_test.go @@ -1213,6 +1213,37 @@ var slicerTests = []slicerTest{{ "/dir/file": "hardlink 1 {test-package_myslice}", "/hardlink": "hardlink 1 {test-package_myslice}", }, +}, { + summary: "Symlink is a valid hard link base file", + slices: []setup.SliceKey{ + {"test-package", "myslice"}}, + pkgs: map[string][]byte{ + "test-package": testutil.MustMakeDeb([]testutil.TarEntry{ + testutil.Dir(0755, "./"), + testutil.Dir(0755, "./dir/"), + testutil.Reg(0644, "./dir/file", "text for file"), + testutil.Lnk(0644, "./symlink", "./dir/file"), + testutil.Hln(0644, "./hardlink", "./symlink"), + }), + }, + release: map[string]string{ + "slices/mydir/test-package.yaml": ` + package: test-package + slices: + myslice: + contents: + /hardlink: + /symlink: + `, + }, + filesystem: map[string]string{ + "/hardlink": "symlink ./dir/file", + "/symlink": "symlink ./dir/file", + }, + report: map[string]string{ + "/symlink": "hardlink 1 -> symlink {test-package_myslice}", + "/hardlink": "hardlink 1 -> symlink {test-package_myslice}", + }, }} var defaultChiselYaml = ` @@ -1357,11 +1388,15 @@ func treeDumpReport(report *slicer.Report) map[string]string { case fs.ModeDir: fsDump = fmt.Sprintf("dir %#o", fperm) case fs.ModeSymlink: - fsDump = fmt.Sprintf("symlink %s", entry.Link) + if entry.HardLinkId > 0 { + // Hard link to a symlink. + fsDump = fmt.Sprintf("hardlink %d -> symlink", entry.HardLinkId) + } else { + fsDump = fmt.Sprintf("symlink %s", entry.Link) + } case 0: if entry.HardLinkId > 0 { - // Hard link. - // relLink := filepath.Clean("/" + strings.TrimPrefix(entry.Link, report.Root)) + // Hard link to a regular file. fsDump = fmt.Sprintf("hardlink %d", entry.HardLinkId) } else { // Regular file.