From 5059b8479ca6eabcf2be41f31ddf59628e9e0ee7 Mon Sep 17 00:00:00 2001 From: Brent Baude Date: Sun, 4 Feb 2024 08:32:41 -0600 Subject: [PATCH] AppleHV - make gz ops sparse gz by definition is not able to preserve the sparse nature of files. using some code from the crc project and gluing it together with our decompression code, we can re-create the sparseness of a file. one downside is the operation is a little bit slower, but i think the gains from the sparse file are well worth it in IO alone. there are a number of todo's in this PR that would be ripe for quick hitting fixes. [NO NEW TESTS NEEDED] Signed-off-by: Brent Baude --- pkg/machine/applehv/machine.go | 11 +-- pkg/machine/applehv/stubber.go | 1 - pkg/machine/compression/copy.go | 117 ++++++++++++++++++++++++++ pkg/machine/compression/copy_test.go | 52 ++++++++++++ pkg/machine/compression/decompress.go | 67 +++++++++++++++ 5 files changed, 237 insertions(+), 11 deletions(-) create mode 100644 pkg/machine/compression/copy.go create mode 100644 pkg/machine/compression/copy_test.go diff --git a/pkg/machine/applehv/machine.go b/pkg/machine/applehv/machine.go index f050f6e0d3..5ffb2fe8ac 100644 --- a/pkg/machine/applehv/machine.go +++ b/pkg/machine/applehv/machine.go @@ -5,7 +5,6 @@ package applehv import ( "fmt" "os" - "os/exec" "syscall" "github.com/containers/podman/v4/pkg/machine" @@ -101,15 +100,7 @@ func checkProcessRunning(processName string, pid int) error { // is assumed GiB func resizeDisk(mc *vmconfigs.MachineConfig, newSize strongunits.GiB) error { logrus.Debugf("resizing %s to %d bytes", mc.ImagePath.GetPath(), newSize.ToBytes()) - // seems like os.truncate() is not very performant with really large files - // so exec'ing out to the command truncate - size := fmt.Sprintf("%dG", newSize) - c := exec.Command("truncate", "-s", size, mc.ImagePath.GetPath()) - if logrus.IsLevelEnabled(logrus.DebugLevel) { - c.Stderr = os.Stderr - c.Stdout = os.Stdout - } - return c.Run() + return os.Truncate(mc.ImagePath.GetPath(), int64(newSize.ToBytes())) } func generateSystemDFilesForVirtiofsMounts(mounts []machine.VirtIoFs) []ignition.Unit { diff --git a/pkg/machine/applehv/stubber.go b/pkg/machine/applehv/stubber.go index 8656f8e727..5d8a7d3a98 100644 --- a/pkg/machine/applehv/stubber.go +++ b/pkg/machine/applehv/stubber.go @@ -295,7 +295,6 @@ func (a AppleHVStubber) VMType() define.VMType { return define.AppleHvVirt } - func waitForGvProxy(gvproxySocket *define.VMFile) error { backoffWait := gvProxyWaitBackoff logrus.Debug("checking that gvproxy is running") diff --git a/pkg/machine/compression/copy.go b/pkg/machine/compression/copy.go new file mode 100644 index 0000000000..2e4637865c --- /dev/null +++ b/pkg/machine/compression/copy.go @@ -0,0 +1,117 @@ +package compression + +import ( + "bytes" + "io" + "os" +) + +// TODO vendor this in ... pkg/os directory is small and code should be negligible +/* + NOTE: copy.go and copy.test were lifted from github.com/crc-org/crc because + i was having trouble getting go to vendor it properly. all credit to them +*/ + +func copyFile(src, dst string, sparse bool) error { + in, err := os.Open(src) + if err != nil { + return err + } + + defer in.Close() + + out, err := os.Create(dst) + if err != nil { + return err + } + + defer out.Close() + + if sparse { + if _, err = CopySparse(out, in); err != nil { + return err + } + } else { + if _, err = io.Copy(out, in); err != nil { + return err + } + } + + fi, err := os.Stat(src) + if err != nil { + return err + } + + if err = os.Chmod(dst, fi.Mode()); err != nil { + return err + } + + return out.Close() +} + +func CopyFile(src, dst string) error { + return copyFile(src, dst, false) +} + +func CopyFileSparse(src, dst string) error { + return copyFile(src, dst, true) +} + +func CopySparse(dst io.WriteSeeker, src io.Reader) (int64, error) { + copyBuf := make([]byte, copyChunkSize) + sparseWriter := newSparseWriter(dst) + + bytesWritten, err := io.CopyBuffer(sparseWriter, src, copyBuf) + if err != nil { + return bytesWritten, err + } + err = sparseWriter.Close() + return bytesWritten, err +} + +type sparseWriter struct { + writer io.WriteSeeker + lastChunkSparse bool +} + +func newSparseWriter(writer io.WriteSeeker) *sparseWriter { + return &sparseWriter{writer: writer} +} + +const copyChunkSize = 4096 + +var emptyChunk = make([]byte, copyChunkSize) + +func isEmptyChunk(p []byte) bool { + // HasPrefix instead of bytes.Equal in order to handle the last chunk + // of the file, which may be shorter than len(emptyChunk), and would + // fail bytes.Equal() + return bytes.HasPrefix(emptyChunk, p) +} + +func (w *sparseWriter) Write(p []byte) (n int, err error) { + if isEmptyChunk(p) { + offset, err := w.writer.Seek(int64(len(p)), io.SeekCurrent) + if err != nil { + w.lastChunkSparse = false + return 0, err + } + _ = offset + w.lastChunkSparse = true + return len(p), nil + } + w.lastChunkSparse = false + return w.writer.Write(p) +} + +func (w *sparseWriter) Close() error { + if w.lastChunkSparse { + if _, err := w.writer.Seek(-1, io.SeekCurrent); err != nil { + return err + } + if _, err := w.writer.Write([]byte{0}); err != nil { + return err + } + } + return nil +} diff --git a/pkg/machine/compression/copy_test.go b/pkg/machine/compression/copy_test.go new file mode 100644 index 0000000000..9c25535ec5 --- /dev/null +++ b/pkg/machine/compression/copy_test.go @@ -0,0 +1,52 @@ +package compression + +import ( + "os" + "path/filepath" + "testing" +) + +func TestCopyFile(t *testing.T) { + testStr := "test-machine" + + srcFile, err := os.CreateTemp("", "machine-test-") + if err != nil { + t.Fatal(err) + } + srcFi, err := srcFile.Stat() + if err != nil { + t.Fatal(err) + } + + _, _ = srcFile.Write([]byte(testStr)) //nolint:mirror + srcFile.Close() + + srcFilePath := filepath.Join(os.TempDir(), srcFi.Name()) + + destFile, err := os.CreateTemp("", "machine-copy-test-") + if err != nil { + t.Fatal(err) + } + + destFi, err := destFile.Stat() + if err != nil { + t.Fatal(err) + } + + destFile.Close() + + destFilePath := filepath.Join(os.TempDir(), destFi.Name()) + + if err := CopyFile(srcFilePath, destFilePath); err != nil { + t.Fatal(err) + } + + data, err := os.ReadFile(destFilePath) + if err != nil { + t.Fatal(err) + } + + if string(data) != testStr { + t.Fatalf("expected data \"%s\"; received \"%s\"", testStr, string(data)) + } +} diff --git a/pkg/machine/compression/decompress.go b/pkg/machine/compression/decompress.go index f5b2a16cc4..78dd91ccfe 100644 --- a/pkg/machine/compression/decompress.go +++ b/pkg/machine/compression/decompress.go @@ -3,6 +3,7 @@ package compression import ( "archive/zip" "bufio" + "compress/gzip" "errors" "io" "os" @@ -19,12 +20,20 @@ import ( "github.com/ulikunitz/xz" ) +// Decompress is a generic wrapper for various decompression algos +// TODO this needs some love. in the various decompression functions that are +// called, the same uncompressed path is being opened multiple times. func Decompress(localPath *define.VMFile, uncompressedPath string) error { var isZip bool uncompressedFileWriter, err := os.OpenFile(uncompressedPath, os.O_CREATE|os.O_RDWR, 0600) if err != nil { return err } + defer func() { + if err := uncompressedFileWriter.Close(); err != nil { + logrus.Errorf("unable to to close decompressed file %s: %q", uncompressedPath, err) + } + }() sourceFile, err := localPath.Read() if err != nil { return err @@ -44,6 +53,11 @@ func Decompress(localPath *define.VMFile, uncompressedPath string) error { if isZip && runtime.GOOS == "windows" { return decompressZip(prefix, localPath.GetPath(), uncompressedFileWriter) } + + // Unfortunately GZ is not sparse capable. Lets handle it differently + if compressionType == archive.Gzip && runtime.GOOS == "darwin" { + return decompressGzWithSparse(prefix, localPath, uncompressedPath) + } return decompressEverythingElse(prefix, localPath.GetPath(), uncompressedFileWriter) } @@ -182,3 +196,56 @@ func decompressZip(prefix string, src string, output io.WriteCloser) error { p.Wait() return err } + +func decompressGzWithSparse(prefix string, compressedPath *define.VMFile, uncompressedPath string) error { + stat, err := os.Stat(compressedPath.GetPath()) + if err != nil { + return err + } + + dstFile, err := os.OpenFile(uncompressedPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, stat.Mode()) + if err != nil { + return err + } + defer func() { + if err := dstFile.Close(); err != nil { + logrus.Errorf("unable to close uncompressed file %s: %q", uncompressedPath, err) + } + }() + + f, err := os.Open(compressedPath.GetPath()) + if err != nil { + return err + } + defer func() { + if err := f.Close(); err != nil { + logrus.Errorf("unable to close on compressed file %s: %q", compressedPath.GetPath(), err) + } + }() + + gzReader, err := gzip.NewReader(f) + if err != nil { + return err + } + defer func() { + if err := gzReader.Close(); err != nil { + logrus.Errorf("unable to close gzreader: %q", err) + } + }() + + // TODO remove the following line when progress bars work + _ = prefix + // p, bar := utils.ProgressBar(prefix, stat.Size(), prefix+": done") + // proxyReader := bar.ProxyReader(f) + // defer func() { + // if err := proxyReader.Close(); err != nil { + // logrus.Error(err) + // } + // }() + + logrus.Debugf("decompressing %s", compressedPath.GetPath()) + _, err = CopySparse(dstFile, gzReader) + logrus.Debug("decompression complete") + // p.Wait() + return err +}