From 6fa3a39d88be1f3b622611bfe7a8ae1e599e1e47 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Mon, 5 Nov 2018 20:56:35 -0700 Subject: [PATCH 01/18] Begin rewrite: interface declarations, ZIP implementation --- archive/archive.go | 167 +++++++++++++ archive/archive_test.go | 121 ++++++++++ archive/zip.go | 519 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 807 insertions(+) create mode 100644 archive/archive.go create mode 100644 archive/archive_test.go create mode 100644 archive/zip.go diff --git a/archive/archive.go b/archive/archive.go new file mode 100644 index 00000000..325679da --- /dev/null +++ b/archive/archive.go @@ -0,0 +1,167 @@ +package archive + +import ( + "fmt" + "io" + "os" + "path" + "path/filepath" + "runtime" + "strings" +) + +// Archiver is a type that can create an archive file +// from a list of source file names. +type Archiver interface { + Archive(sources []string, destination string) error +} + +// Unarchiver is a type that can extract archive files +// into a folder. +type Unarchiver interface { + Unarchive(source, destination string) error +} + +// Writer can write discrete byte streams of files to +// an output stream. +type Writer interface { + Create(out io.Writer) error + Write(f File) error + Close() error +} + +// Reader can read discrete byte streams of files from +// an input stream. +type Reader interface { + Open(in io.ReaderAt, size int64) error + Read() (File, error) + Close() error +} + +// Extractor can extract a specific file from a source +// archive to a specific destination folder on disk. +type Extractor interface { + Extract(source, target, destination string) error +} + +// File provides methods for accessing information about +// or contents of a file within an archive. +type File struct { + os.FileInfo + + // The original header info; depends on + // type of archive -- could be nil, too. + Header interface{} + + // Allow the file contents to be read (and closed) + io.ReadCloser +} + +// FileInfo is an os.FileInfo but optionally with +// a custom name, useful if dealing with files that +// are not actual files on disk, or which have a +// different name in an archive than on disk. +type FileInfo struct { + os.FileInfo + CustomName string +} + +// Name returns fi.CustomName if not empty; +// otherwise it returns fi.FileInfo.Name(). +func (fi FileInfo) Name() string { + if fi.CustomName != "" { + return fi.CustomName + } + return fi.FileInfo.Name() +} + +// Walker can walk an archive file and return information +// about each item in the archive. +type Walker interface { + Walk(archive string, walkFn WalkFunc) error +} + +// WalkFunc is called at each item visited by Walk. +// If an error is returned, the walk may continue +// if the Walker is configured to continue on error. +// The sole exception is the error value ErrStopWalk, +// which stops the walk without an actual error. +type WalkFunc func(f File) error + +// ErrStopWalk signals Walk to break without error. +var ErrStopWalk = fmt.Errorf("walk stopped") + +func fileExists(name string) bool { + _, err := os.Stat(name) + return !os.IsNotExist(err) +} + +func mkdir(dirPath string) error { + err := os.MkdirAll(dirPath, 0755) + if err != nil { + return fmt.Errorf("%s: making directory: %v", dirPath, err) + } + return nil +} + +func writeNewFile(fpath string, in io.Reader, fm os.FileMode) error { + err := os.MkdirAll(filepath.Dir(fpath), 0755) + if err != nil { + return fmt.Errorf("%s: making directory for file: %v", fpath, err) + } + + out, err := os.Create(fpath) + if err != nil { + return fmt.Errorf("%s: creating new file: %v", fpath, err) + } + defer out.Close() + + err = out.Chmod(fm) + if err != nil && runtime.GOOS != "windows" { + return fmt.Errorf("%s: changing file mode: %v", fpath, err) + } + + _, err = io.Copy(out, in) + if err != nil { + return fmt.Errorf("%s: writing file: %v", fpath, err) + } + return nil +} + +// within returns true if sub is within or equal to parent. +func within(parent, sub string) bool { + rel, err := filepath.Rel(parent, sub) + if err != nil { + return false + } + return !strings.Contains(rel, "..") +} + +func multipleTopLevels(paths []string) bool { + if len(paths) < 2 { + return false + } + var lastTop string + for _, p := range paths { + p = strings.TrimPrefix(strings.Replace(p, `\`, "/", -1), "/") + for { + next := path.Dir(p) + if next == "." { + break + } + p = next + } + if lastTop == "" { + lastTop = p + } + if p != lastTop { + return true + } + } + return false +} + +func folderNameFromFileName(filename string) string { + base := filepath.Base(filename) + return strings.TrimSuffix(base, filepath.Ext(base)) +} diff --git a/archive/archive_test.go b/archive/archive_test.go new file mode 100644 index 00000000..2fc46feb --- /dev/null +++ b/archive/archive_test.go @@ -0,0 +1,121 @@ +package archive + +import "testing" + +func TestWithin(t *testing.T) { + for i, tc := range []struct { + path1, path2 string + expect bool + }{ + { + path1: "/foo", + path2: "/foo/bar", + expect: true, + }, + { + path1: "/foo", + path2: "/foobar/asdf", + expect: false, + }, + { + path1: "/foobar/", + path2: "/foobar/asdf", + expect: true, + }, + { + path1: "/foobar/asdf", + path2: "/foobar", + expect: false, + }, + { + path1: "/foobar/asdf", + path2: "/foobar/", + expect: false, + }, + { + path1: "/", + path2: "/asdf", + expect: true, + }, + { + path1: "/asdf", + path2: "/asdf", + expect: true, + }, + { + path1: "/", + path2: "/", + expect: true, + }, + } { + actual := within(tc.path1, tc.path2) + if actual != tc.expect { + t.Errorf("Test %d: [%s %s] Expected %t but got %t", i, tc.path1, tc.path2, tc.expect, actual) + } + } +} + +func TestMultipleTopLevels(t *testing.T) { + for i, tc := range []struct { + set []string + expect bool + }{ + { + set: []string{}, + expect: false, + }, + { + set: []string{"/foo"}, + expect: false, + }, + { + set: []string{"/foo", "/foo/bar"}, + expect: false, + }, + { + set: []string{"/foo", "/bar"}, + expect: true, + }, + { + set: []string{"/foo", "/foobar"}, + expect: true, + }, + { + set: []string{"foo", "foo/bar"}, + expect: false, + }, + { + set: []string{"foo", "/foo/bar"}, + expect: false, + }, + { + set: []string{"../foo", "foo/bar"}, + expect: true, + }, + { + set: []string{`C:\foo\bar`, `C:\foo\bar\zee`}, + expect: false, + }, + { + set: []string{`C:\`, `C:\foo\bar`}, + expect: false, + }, + { + set: []string{`D:\foo`, `E:\foo`}, + expect: true, + }, + { + set: []string{`D:\foo`, `D:\foo\bar`, `C:\foo`}, + expect: true, + }, + { + set: []string{"/foo", "/", "/bar"}, + expect: true, + }, + } { + actual := multipleTopLevels(tc.set) + if actual != tc.expect { + t.Errorf("Test %d: %v: Expected %t but got %t", i, tc.set, tc.expect, actual) + } + } +} diff --git a/archive/zip.go b/archive/zip.go new file mode 100644 index 00000000..2fde3be1 --- /dev/null +++ b/archive/zip.go @@ -0,0 +1,519 @@ +package archive + +import ( + "archive/zip" + "compress/flate" + "fmt" + "io" + "log" + "os" + "path" + "path/filepath" + "strings" +) + +// Zip provides facilities for operating ZIP archives. +// See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT. +type Zip struct { + // The compression level to use, as described + // in the compress/flat package. + CompressionLevel int + + // Whether to overwrite existing files; if false, + // an error is returned if the file exists. + OverwriteExisting bool + + // Whether to make all the directories necessary + // to create a zip archive in the desired path. + MkdirAll bool + + // If enabled, selective compression will only + // compress files which are not already in a + // compressed format; this is decided based + // simply on file extension. + SelectiveCompression bool + + // A single top-level folder can be implicitly + // created by the Archive or Unarchive methods + // if the files to be added to the archive + // or the files to be extracted from the archive + // do not all have a common root. This roughly + // mimics the behavior of archival tools integrated + // into OS file browsers which create a subfolder + // to avoid unexpectedly littering the destination + // folder with potentially many files, causing a + // problematic cleanup/organization situation. + // This feature is available for both creation + // and extraction of archives, but may be slightly + // inefficient with lots and lots of files, + // especially on extraction. + ImplicitTopLevelFolder bool + + // If true, errors encountered during reading + // or writing a single file will be logged and + // the operation will continue on remaining files. + ContinueOnError bool + + zw *zip.Writer + zr *zip.Reader + ridx int +} + +// Archive creates a .zip file at destination containing +// the files listed in sources. The destination must end +// with ".zip". File paths can be those of regular files +// or directories. Regular files are stored at the 'root' +// of the archive, and directories are recursively added. +func (z *Zip) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".zip") { + return fmt.Errorf("output filename must have .zip extension") + } + if !z.OverwriteExisting && fileExists(destination) { + return fmt.Errorf("file already exists: %s", destination) + } + + out, err := os.Create(destination) + if err != nil { + return fmt.Errorf("creating %s: %v", destination, err) + } + defer out.Close() + + err = z.Create(out) + if err != nil { + return fmt.Errorf("creating zip: %v", err) + } + defer z.Close() + + var topLevelFolder string + if z.ImplicitTopLevelFolder && multipleTopLevels(sources) { + topLevelFolder = folderNameFromFileName(destination) + } + + for _, source := range sources { + err := z.writeWalk(source, topLevelFolder) + if err != nil { + return fmt.Errorf("walking %s: %v", source, err) + } + } + + return nil +} + +// Unarchive unpacks the .zip file at source to destination. +// Destination will be treated as a folder name. +func (z *Zip) Unarchive(source, destination string) error { + if !fileExists(destination) && z.MkdirAll { + err := mkdir(destination) + if err != nil { + return fmt.Errorf("preparing destination: %v", err) + } + } + + file, err := os.Open(source) + if err != nil { + return fmt.Errorf("opening source file: %v", err) + } + defer file.Close() + + fileInfo, err := file.Stat() + if err != nil { + return fmt.Errorf("statting source file: %v", err) + } + + err = z.Open(file, fileInfo.Size()) + if err != nil { + return fmt.Errorf("opening zip archive for reading: %v", err) + } + defer z.Close() + + // if the files in the archive do not all share a common + // root, then make sure we extract to a single subfolder + // rather than potentially littering the destination... + if z.ImplicitTopLevelFolder { + files := make([]string, len(z.zr.File)) + for i := range z.zr.File { + files[i] = z.zr.File[i].Name + } + if multipleTopLevels(files) { + destination = filepath.Join(destination, folderNameFromFileName(source)) + } + } + + for { + err := z.extractNext(destination) + if err == io.EOF { + break + } + if err != nil { + if z.ContinueOnError { + log.Printf("[ERROR] Reading file in zip archive: %v", err) + continue + } + return fmt.Errorf("reading file in zip archive: %v", err) + } + } + + return nil +} + +func (z *Zip) extractNext(to string) error { + f, err := z.Read() + if err != nil { + return err // don't wrap error; calling loop must break on io.EOF + } + defer f.Close() + header, ok := f.Header.(zip.FileHeader) + if !ok { + return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) + } + return z.extractFile(f, filepath.Join(to, header.Name)) +} + +func (z *Zip) extractFile(f File, to string) error { + // if a directory, no content; simply make the directory and return + if f.IsDir() { + return mkdir(to) + } + + // do not overwrite existing files, if configured + if !z.OverwriteExisting && fileExists(to) { + return fmt.Errorf("file already exists: %s", to) + } + + return writeNewFile(to, f, f.Mode()) +} + +func (z *Zip) writeWalk(source, topLevelFolder string) error { + sourceAbs, err := filepath.Abs(source) + if err != nil { + return fmt.Errorf("getting absolute path: %v", err) + } + sourceInfo, err := os.Stat(sourceAbs) + if err != nil { + return fmt.Errorf("%s: stat: %v", source, err) + } + + var baseDir string + if topLevelFolder != "" { + baseDir = topLevelFolder + } + if sourceInfo.IsDir() { + baseDir = path.Join(baseDir, sourceInfo.Name()) + } + + return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error { + handleErr := func(err error) error { + if z.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", fpath, err) + return nil + } + return err + } + if err != nil { + return handleErr(fmt.Errorf("traversing %s: %v", fpath, err)) + } + if info == nil { + return handleErr(fmt.Errorf("no file info")) + } + + name := source + if source != fpath { + name, err = filepath.Rel(source, fpath) + if err != nil { + return handleErr(err) + } + } + + nameInArchive := path.Join(baseDir, filepath.ToSlash(name)) + + file, err := os.Open(fpath) + if err != nil { + return handleErr(fmt.Errorf("%s: opening: %v", fpath, err)) + } + defer file.Close() + + err = z.Write(File{ + FileInfo: FileInfo{ + FileInfo: info, + CustomName: nameInArchive, + }, + ReadCloser: file, + }) + if err != nil { + return handleErr(fmt.Errorf("%s: writing: %s", fpath, err)) + } + + return nil + }) +} + +// Create opens z for writing a ZIP archive to out. +func (z *Zip) Create(out io.Writer) error { + if z.zw != nil { + return fmt.Errorf("zip archive is already created for writing") + } + z.zw = zip.NewWriter(out) + if z.CompressionLevel != flate.DefaultCompression { + z.zw.RegisterCompressor(zip.Deflate, func(out io.Writer) (io.WriteCloser, error) { + return flate.NewWriter(out, z.CompressionLevel) + }) + } + return nil +} + +// Write writes f to z, which must have been opened for writing first. +func (z *Zip) Write(f File) error { + if z.zw == nil { + return fmt.Errorf("zip archive was not created for writing first") + } + if f.FileInfo == nil { + return fmt.Errorf("no file info") + } + if f.FileInfo.Name() == "" { + return fmt.Errorf("missing file name") + } + if f.ReadCloser == nil { + return fmt.Errorf("%s: no way to read file contents", f.Name()) + } + + header, err := zip.FileInfoHeader(f) + if err != nil { + return fmt.Errorf("%s: getting header: %v", f.Name(), err) + } + + if f.IsDir() { + header.Name += "/" // required - strangely no mention of this in zip spec? but is in godoc... + header.Method = zip.Store + } else { + ext := strings.ToLower(path.Ext(header.Name)) + if _, ok := compressedFormats[ext]; ok && z.SelectiveCompression { + header.Method = zip.Store + } else { + header.Method = zip.Deflate + } + } + + writer, err := z.zw.CreateHeader(header) + if err != nil { + return fmt.Errorf("%s: making header: %v", f.Name(), err) + } + + if f.IsDir() { + return nil + } + + if header.Mode().IsRegular() { + _, err := io.Copy(writer, f) + if err != nil { + return fmt.Errorf("%s: copying contents: %v", f.Name(), err) + } + } + + return nil +} + +// Open opens z for reading an archive from in, +// which is expected to have the given size. +func (z *Zip) Open(in io.ReaderAt, size int64) error { + if z.zr != nil { + return fmt.Errorf("zip archive is already open for reading") + } + var err error + z.zr, err = zip.NewReader(in, size) + if err != nil { + return fmt.Errorf("creating reader: %v", err) + } + z.ridx = 0 + return nil +} + +// Read reads the next file from z, which must have +// already been opened for reading. If there are no +// more files, the error is io.EOF. The File must +// be closed when finished reading from it. +func (z *Zip) Read() (File, error) { + if z.zr == nil { + return File{}, fmt.Errorf("zip archive is not open") + } + if z.ridx >= len(z.zr.File) { + return File{}, io.EOF + } + + // access the file and increment counter so that + // if there is an error processing this file, the + // caller can still iterate to the next file + zf := z.zr.File[z.ridx] + z.ridx++ + + file := File{ + FileInfo: zf.FileInfo(), + Header: zf.FileHeader, + } + + rc, err := zf.Open() + if err != nil { + return file, fmt.Errorf("%s: open compressed file: %v", zf.Name, err) + } + file.ReadCloser = rc + + return file, nil +} + +// Close closes the zip archive(s) opened by Create and Open. +func (z *Zip) Close() error { + if z.zr != nil { + z.zr = nil + } + if z.zw != nil { + zw := z.zw + z.zw = nil + return zw.Close() + } + return nil +} + +// Walk calls walkFn for each visited item in archive. +func (z *Zip) Walk(archive string, walkFn WalkFunc) error { + zr, err := zip.OpenReader(archive) + if err != nil { + return fmt.Errorf("opening zip reader: %v", err) + } + defer zr.Close() + + for _, zf := range zr.File { + zfrc, err := zf.Open() + if err != nil { + zfrc.Close() + if z.ContinueOnError { + log.Printf("[ERROR] Opening %s: %v", zf.Name, err) + continue + } + return fmt.Errorf("opening %s: %v", zf.Name, err) + } + + err = walkFn(File{ + FileInfo: zf.FileInfo(), + Header: zf.FileHeader, + ReadCloser: zfrc, + }) + zfrc.Close() + if err != nil { + if err == ErrStopWalk { + break + } + if z.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", zf.Name, err) + continue + } + return fmt.Errorf("walking %s: %v", zf.Name, err) + } + } + + return nil +} + +// Extract extracts a single file from the zip archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (z *Zip) Extract(source, target, destination string) error { + // target refers to a path inside the archive, which should be clean also + target = path.Clean(target) + + // if the target ends up being a directory, then + // we will continue walking and extracting files + // until we are no longer within that directory + var targetDirPath string + + return z.Walk(source, func(f File) error { + zfh, ok := f.Header.(zip.FileHeader) + if !ok { + return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) + } + + // importantly, cleaning the path strips tailing slash, + // which must be appended to folders within the archive + name := path.Clean(zfh.Name) + if f.IsDir() && target == name { + targetDirPath = path.Dir(name) + } + + if within(target, zfh.Name) { + // either this is the exact file we want, or is + // in the directory we want to extract + + // build the filename we will extract to + end, err := filepath.Rel(targetDirPath, zfh.Name) + if err != nil { + return fmt.Errorf("relativizing paths: %v", err) + } + joined := filepath.Join(destination, end) + + err = z.extractFile(f, joined) + if err != nil { + return fmt.Errorf("extracting file %s: %v", zfh.Name, err) + } + + // if our target was not a directory, stop walk + if targetDirPath == "" { + return ErrStopWalk + } + } else if targetDirPath != "" { + // finished walking the entire directory + return ErrStopWalk + } + + return nil + }) +} + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(Zip)) + _ = Writer(new(Zip)) + _ = Archiver(new(Zip)) + _ = Unarchiver(new(Zip)) + _ = Walker(new(Zip)) + _ = Extractor(new(Zip)) +) + +// compressedFormats is a (non-exhaustive) set of lowercased +// file extensions for formats that are typically already +// compressed. Compressing files that are already compressed +// is inefficient, so use this set of extension to avoid that. +var compressedFormats = map[string]struct{}{ + ".7z": {}, + ".avi": {}, + ".br": {}, + ".bz2": {}, + ".cab": {}, + ".docx": {}, + ".gif": {}, + ".gz": {}, + ".jar": {}, + ".jpeg": {}, + ".jpg": {}, + ".lz": {}, + ".lzma": {}, + ".mov": {}, + ".mp3": {}, + ".mp4": {}, + ".mpeg": {}, + ".mpg": {}, + ".png": {}, + ".pptx": {}, + ".rar": {}, + ".tbz2": {}, + ".tgz": {}, + ".txz": {}, + ".xlsx": {}, + ".xz": {}, + ".zip": {}, + ".zipx": {}, +} + +// DefaultZip is a convenient Zip archiver ready to use. +var DefaultZip = &Zip{ + CompressionLevel: flate.DefaultCompression, + MkdirAll: true, + SelectiveCompression: true, +} From c0325ae9f783a60c9299f7fe81b3fe60a1644de9 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Mon, 5 Nov 2018 23:21:18 -0700 Subject: [PATCH 02/18] Implement tar --- archive/archive.go | 40 +++- archive/tar.go | 478 +++++++++++++++++++++++++++++++++++++++++++++ archive/zip.go | 11 +- 3 files changed, 525 insertions(+), 4 deletions(-) create mode 100644 archive/tar.go diff --git a/archive/archive.go b/archive/archive.go index 325679da..bf184655 100644 --- a/archive/archive.go +++ b/archive/archive.go @@ -33,7 +33,7 @@ type Writer interface { // Reader can read discrete byte streams of files from // an input stream. type Reader interface { - Open(in io.ReaderAt, size int64) error + Open(in io.Reader, size int64) error Read() (File, error) Close() error } @@ -75,6 +75,16 @@ func (fi FileInfo) Name() string { return fi.FileInfo.Name() } +// ReadFakeCloser is an io.Reader that has +// a no-op close method to satisfy the +// io.ReadCloser interface. +type ReadFakeCloser struct { + io.Reader +} + +// Close implements io.Closer. +func (rfc ReadFakeCloser) Close() error { return nil } + // Walker can walk an archive file and return information // about each item in the archive. type Walker interface { @@ -128,6 +138,34 @@ func writeNewFile(fpath string, in io.Reader, fm os.FileMode) error { return nil } +func writeNewSymbolicLink(fpath string, target string) error { + err := os.MkdirAll(filepath.Dir(fpath), 0755) + if err != nil { + return fmt.Errorf("%s: making directory for file: %v", fpath, err) + } + + err = os.Symlink(target, fpath) + if err != nil { + return fmt.Errorf("%s: making symbolic link for: %v", fpath, err) + } + + return nil +} + +func writeNewHardLink(fpath string, target string) error { + err := os.MkdirAll(filepath.Dir(fpath), 0755) + if err != nil { + return fmt.Errorf("%s: making directory for file: %v", fpath, err) + } + + err = os.Link(target, fpath) + if err != nil { + return fmt.Errorf("%s: making hard link for: %v", fpath, err) + } + + return nil +} + // within returns true if sub is within or equal to parent. func within(parent, sub string) bool { rel, err := filepath.Rel(parent, sub) diff --git a/archive/tar.go b/archive/tar.go new file mode 100644 index 00000000..0e3d2a46 --- /dev/null +++ b/archive/tar.go @@ -0,0 +1,478 @@ +package archive + +import ( + "archive/tar" + "fmt" + "io" + "log" + "os" + "path" + "path/filepath" + "strings" +) + +// Tar provides facilities for operating TAR archives. +// See http://www.gnu.org/software/tar/manual/html_node/Standard.html. +type Tar struct { + // Whether to overwrite existing files; if false, + // an error is returned if the file exists. + OverwriteExisting bool + + // Whether to make all the directories necessary + // to create a tar archive in the desired path. + MkdirAll bool + + // A single top-level folder can be implicitly + // created by the Archive or Unarchive methods + // if the files to be added to the archive + // or the files to be extracted from the archive + // do not all have a common root. This roughly + // mimics the behavior of archival tools integrated + // into OS file browsers which create a subfolder + // to avoid unexpectedly littering the destination + // folder with potentially many files, causing a + // problematic cleanup/organization situation. + // This feature is available for both creation + // and extraction of archives, but may be slightly + // inefficient with lots and lots of files, + // especially on extraction. + ImplicitTopLevelFolder bool + + // If true, errors encountered during reading + // or writing a single file will be logged and + // the operation will continue on remaining files. + ContinueOnError bool + + tw *tar.Writer + tr *tar.Reader +} + +// Archive creates a .tar file at destination containing +// the files listed in sources. The destination must end +// with ".tar". File paths can be those of regular files +// or directories. Regular files are stored at the 'root' +// of the archive, and directories are recursively added. +func (t *Tar) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".tar") { + return fmt.Errorf("output filename must have .tar extension") + } + if !t.OverwriteExisting && fileExists(destination) { + return fmt.Errorf("file already exists: %s", destination) + } + + out, err := os.Create(destination) + if err != nil { + return fmt.Errorf("creating %s: %v", destination, err) + } + defer out.Close() + + err = t.Create(out) + if err != nil { + return fmt.Errorf("creating tar: %v", err) + } + defer t.Close() + + var topLevelFolder string + if t.ImplicitTopLevelFolder && multipleTopLevels(sources) { + topLevelFolder = folderNameFromFileName(destination) + } + + for _, source := range sources { + err := t.writeWalk(source, topLevelFolder) + if err != nil { + return fmt.Errorf("walking %s: %v", source, err) + } + } + + return nil +} + +// Unarchive unpacks the .tar file at source to destination. +// Destination will be treated as a folder name. +func (t *Tar) Unarchive(source, destination string) error { + if !fileExists(destination) && t.MkdirAll { + err := mkdir(destination) + if err != nil { + return fmt.Errorf("preparing destination: %v", err) + } + } + + // if the files in the archive do not all share a common + // root, then make sure we extract to a single subfolder + // rather than potentially littering the destination... + if t.ImplicitTopLevelFolder { + var err error + destination, err = t.addTopLevelFolder(source, destination) + if err != nil { + return fmt.Errorf("scanning source archive: %v", err) + } + } + + file, err := os.Open(source) + if err != nil { + return fmt.Errorf("opening source archive: %v", err) + } + defer file.Close() + + err = t.Open(file, 0) + if err != nil { + return fmt.Errorf("opening tar archive for reading: %v", err) + } + defer t.Close() + + for { + err := t.untarNext(destination) + if err == io.EOF { + break + } + if err != nil { + if t.ContinueOnError { + log.Printf("[ERROR] Reading file in tar archive: %v", err) + continue + } + return fmt.Errorf("reading file in tar archive: %v", err) + } + } + + return nil +} + +// addTopLevelFolder scans the files contained inside +// the tarball named sourceArchive and returns a modified +// destination if all the files do not share the same +// top-level folder. +func (t *Tar) addTopLevelFolder(sourceArchive, destination string) (string, error) { + file, err := os.Open(sourceArchive) + if err != nil { + return "", fmt.Errorf("opening source archive: %v", err) + } + defer file.Close() + + tr := tar.NewReader(file) + + var files []string + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return "", fmt.Errorf("scanning tarball's file listing: %v", err) + } + files = append(files, hdr.Name) + } + + if multipleTopLevels(files) { + destination = filepath.Join(destination, folderNameFromFileName(sourceArchive)) + } + + return destination, nil +} + +func (t *Tar) untarNext(to string) error { + f, err := t.Read() + if err != nil { + return err // don't wrap error; calling loop must break on io.EOF + } + header, ok := f.Header.(*tar.Header) + if !ok { + return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header) + } + return t.untarFile(f, filepath.Join(to, header.Name)) +} + +func (t *Tar) untarFile(f File, to string) error { + // do not overwrite existing files, if configured + if !f.IsDir() && !t.OverwriteExisting && fileExists(to) { + return fmt.Errorf("file already exists: %s", to) + } + + hdr, ok := f.Header.(*tar.Header) + if !ok { + return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header) + } + + switch hdr.Typeflag { + case tar.TypeDir: + return mkdir(to) + case tar.TypeReg, tar.TypeRegA, tar.TypeChar, tar.TypeBlock, tar.TypeFifo: + return writeNewFile(to, f, f.Mode()) + case tar.TypeSymlink: + return writeNewSymbolicLink(to, hdr.Linkname) + case tar.TypeLink: + return writeNewHardLink(to, filepath.Join(to, hdr.Linkname)) + case tar.TypeXGlobalHeader: + return nil // ignore the pax global header from git-generated tarballs + default: + return fmt.Errorf("%s: unknown type flag: %c", hdr.Name, hdr.Typeflag) + } +} + +func (t *Tar) writeWalk(source, topLevelFolder string) error { + sourceAbs, err := filepath.Abs(source) + if err != nil { + return fmt.Errorf("getting absolute path: %v", err) + } + sourceInfo, err := os.Stat(sourceAbs) + if err != nil { + return fmt.Errorf("%s: stat: %v", source, err) + } + + var baseDir string + if topLevelFolder != "" { + baseDir = topLevelFolder + } + if sourceInfo.IsDir() { + baseDir = path.Join(baseDir, sourceInfo.Name()) + } + + return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error { + handleErr := func(err error) error { + if t.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", fpath, err) + return nil + } + return err + } + if err != nil { + return handleErr(fmt.Errorf("traversing %s: %v", fpath, err)) + } + if info == nil { + return handleErr(fmt.Errorf("no file info")) + } + + name := source + if source != fpath { + name, err = filepath.Rel(source, fpath) + if err != nil { + return handleErr(err) + } + } + + nameInArchive := path.Join(baseDir, filepath.ToSlash(name)) + + file, err := os.Open(fpath) + if err != nil { + return handleErr(fmt.Errorf("%s: opening: %v", fpath, err)) + } + defer file.Close() + + err = t.Write(File{ + FileInfo: FileInfo{ + FileInfo: info, + CustomName: nameInArchive, + }, + ReadCloser: file, + }) + if err != nil { + return handleErr(fmt.Errorf("%s: writing: %s", fpath, err)) + } + + return nil + }) +} + +// Create opens t for writing a tar archive to out. +func (t *Tar) Create(out io.Writer) error { + if t.tw != nil { + return fmt.Errorf("tar archive is already created for writing") + } + t.tw = tar.NewWriter(out) + return nil +} + +// Write writes f to t, which must have been opened for writing first. +func (t *Tar) Write(f File) error { + if t.tw == nil { + return fmt.Errorf("tar archive was not created for writing first") + } + if f.FileInfo == nil { + return fmt.Errorf("no file info") + } + if f.FileInfo.Name() == "" { + return fmt.Errorf("missing file name") + } + if f.ReadCloser == nil { + return fmt.Errorf("%s: no way to read file contents", f.Name()) + } + + hdr, err := tar.FileInfoHeader(f, f.Name()) + if err != nil { + return fmt.Errorf("%s: making header: %v", f.Name(), err) + } + + err = t.tw.WriteHeader(hdr) + if err != nil { + return fmt.Errorf("%s: writing header: %v", hdr.Name, err) + } + + if f.IsDir() { + return nil + } + + if hdr.Typeflag == tar.TypeReg { + _, err := io.Copy(t.tw, f) + if err != nil { + return fmt.Errorf("%s: copying contents: %v", f.Name(), err) + } + } + + return nil +} + +// Open opens t for reading an archive from in. +// The size parameter is not needed. +func (t *Tar) Open(in io.Reader, size int64) error { + if t.tr != nil { + return fmt.Errorf("tar archive is already open for reading") + } + t.tr = tar.NewReader(in) + return nil +} + +// Read reads the next file from t, which must have +// already been opened for reading. If there are no +// more files, the error is io.EOF. The File must +// be closed when finished reading from it. +func (t *Tar) Read() (File, error) { + if t.tr == nil { + return File{}, fmt.Errorf("tar archive is not open") + } + + hdr, err := t.tr.Next() + if err != nil { + return File{}, err // don't wrap error; preserve io.EOF + } + + file := File{ + FileInfo: hdr.FileInfo(), + Header: hdr, + ReadCloser: ReadFakeCloser{t.tr}, + } + + return file, nil +} + +// Close closes the tar archive(s) opened by Create and Open. +func (t *Tar) Close() error { + if t.tr != nil { + t.tr = nil + } + if t.tw != nil { + tw := t.tw + t.tw = nil + return tw.Close() + } + return nil +} + +// Walk calls walkFn for each visited item in archive. +func (t *Tar) Walk(archive string, walkFn WalkFunc) error { + file, err := os.Open(archive) + if err != nil { + return fmt.Errorf("opening archive file: %v", err) + } + defer file.Close() + + tr := tar.NewReader(file) + + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + if t.ContinueOnError { + log.Printf("[ERROR] Opening next file: %v", err) + continue + } + return fmt.Errorf("opening next file: %v", err) + } + err = walkFn(File{ + FileInfo: hdr.FileInfo(), + Header: hdr, + ReadCloser: ReadFakeCloser{tr}, + }) + if err != nil { + if err == ErrStopWalk { + break + } + if t.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", hdr.Name, err) + continue + } + return fmt.Errorf("walking %s: %v", hdr.Name, err) + } + } + + return nil +} + +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (t *Tar) Extract(source, target, destination string) error { + // target refers to a path inside the archive, which should be clean also + target = path.Clean(target) + + // if the target ends up being a directory, then + // we will continue walking and extracting files + // until we are no longer within that directory + var targetDirPath string + + return t.Walk(source, func(f File) error { + th, ok := f.Header.(*tar.Header) + if !ok { + return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header) + } + + // importantly, cleaning the path strips tailing slash, + // which must be appended to folders within the archive + name := path.Clean(th.Name) + if f.IsDir() && target == name { + targetDirPath = path.Dir(name) + } + + if within(target, th.Name) { + // either this is the exact file we want, or is + // in the directory we want to extract + + // build the filename we will extract to + end, err := filepath.Rel(targetDirPath, th.Name) + if err != nil { + return fmt.Errorf("relativizing paths: %v", err) + } + joined := filepath.Join(destination, end) + + err = t.untarFile(f, joined) + if err != nil { + return fmt.Errorf("extracting file %s: %v", th.Name, err) + } + + // if our target was not a directory, stop walk + if targetDirPath == "" { + return ErrStopWalk + } + } else if targetDirPath != "" { + // finished walking the entire directory + return ErrStopWalk + } + + return nil + }) +} + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(Tar)) + _ = Writer(new(Tar)) + _ = Archiver(new(Tar)) + _ = Unarchiver(new(Tar)) + _ = Walker(new(Tar)) + _ = Extractor(new(Tar)) +) + +// DefaultTar is a convenient Tar archiver ready to use. +var DefaultTar = &Tar{ + MkdirAll: true, +} diff --git a/archive/zip.go b/archive/zip.go index 2fde3be1..fb944d0c 100644 --- a/archive/zip.go +++ b/archive/zip.go @@ -313,13 +313,18 @@ func (z *Zip) Write(f File) error { } // Open opens z for reading an archive from in, -// which is expected to have the given size. -func (z *Zip) Open(in io.ReaderAt, size int64) error { +// which is expected to have the given size and +// which must be an io.ReaderAt. +func (z *Zip) Open(in io.Reader, size int64) error { + inRdrAt, ok := in.(io.ReaderAt) + if !ok { + return fmt.Errorf("reader must be io.ReaderAt") + } if z.zr != nil { return fmt.Errorf("zip archive is already open for reading") } var err error - z.zr, err = zip.NewReader(in, size) + z.zr, err = zip.NewReader(inRdrAt, size) if err != nil { return fmt.Errorf("creating reader: %v", err) } From 132b9c7798f9a61a493eb5da918ab0bfae162f84 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 00:47:59 -0700 Subject: [PATCH 03/18] Implement .tar.gz --- archive/archive.go | 6 ++- archive/tar.go | 71 ++++++++++++++++++++++++----- archive/targz.go | 109 +++++++++++++++++++++++++++++++++++++++++++++ archive/zip.go | 12 ++++- 4 files changed, 185 insertions(+), 13 deletions(-) create mode 100644 archive/targz.go diff --git a/archive/archive.go b/archive/archive.go index bf184655..27cff3f0 100644 --- a/archive/archive.go +++ b/archive/archive.go @@ -201,5 +201,9 @@ func multipleTopLevels(paths []string) bool { func folderNameFromFileName(filename string) string { base := filepath.Base(filename) - return strings.TrimSuffix(base, filepath.Ext(base)) + firstDot := strings.Index(base, ".") + if firstDot > -1 { + return base[:firstDot] + } + return base } diff --git a/archive/tar.go b/archive/tar.go index 0e3d2a46..fc8aef84 100644 --- a/archive/tar.go +++ b/archive/tar.go @@ -45,6 +45,10 @@ type Tar struct { tw *tar.Writer tr *tar.Reader + + readerWrapFn func(io.Reader) (io.Reader, error) + writerWrapFn func(io.Writer) (io.Writer, error) + cleanupWrapFn func() } // Archive creates a .tar file at destination containing @@ -53,13 +57,23 @@ type Tar struct { // or directories. Regular files are stored at the 'root' // of the archive, and directories are recursively added. func (t *Tar) Archive(sources []string, destination string) error { - if !strings.HasSuffix(destination, ".tar") { + if t.writerWrapFn == nil && !strings.HasSuffix(destination, ".tar") { return fmt.Errorf("output filename must have .tar extension") } if !t.OverwriteExisting && fileExists(destination) { return fmt.Errorf("file already exists: %s", destination) } + // make the folder to contain the resulting archive + // if it does not already exist + destDir := filepath.Dir(destination) + if t.MkdirAll && !fileExists(destDir) { + err := mkdir(destDir) + if err != nil { + return fmt.Errorf("making folder for destination: %v", err) + } + } + out, err := os.Create(destination) if err != nil { return fmt.Errorf("creating %s: %v", destination, err) @@ -148,7 +162,20 @@ func (t *Tar) addTopLevelFolder(sourceArchive, destination string) (string, erro } defer file.Close() - tr := tar.NewReader(file) + // if the reader is to be wrapped, ensure we do that now + // or we will not be able to read the archive successfully + reader := io.Reader(file) + if t.readerWrapFn != nil { + reader, err = t.readerWrapFn(reader) + if err != nil { + return "", fmt.Errorf("wrapping reader: %v", err) + } + } + if t.cleanupWrapFn != nil { + defer t.cleanupWrapFn() + } + + tr := tar.NewReader(reader) var files []string for { @@ -277,6 +304,17 @@ func (t *Tar) Create(out io.Writer) error { if t.tw != nil { return fmt.Errorf("tar archive is already created for writing") } + + // wrapping writers allows us to output + // compressed tarballs, for example + if t.writerWrapFn != nil { + var err error + out, err = t.writerWrapFn(out) + if err != nil { + return fmt.Errorf("wrapping writer: %v", err) + } + } + t.tw = tar.NewWriter(out) return nil } @@ -326,6 +364,14 @@ func (t *Tar) Open(in io.Reader, size int64) error { if t.tr != nil { return fmt.Errorf("tar archive is already open for reading") } + // wrapping readers allows us to open compressed tarballs + if t.readerWrapFn != nil { + var err error + in, err = t.readerWrapFn(in) + if err != nil { + return fmt.Errorf("wrapping file reader: %v", err) + } + } t.tr = tar.NewReader(in) return nil } @@ -355,6 +401,9 @@ func (t *Tar) Read() (File, error) { // Close closes the tar archive(s) opened by Create and Open. func (t *Tar) Close() error { + if t.cleanupWrapFn != nil { + t.cleanupWrapFn() + } if t.tr != nil { t.tr = nil } @@ -374,10 +423,14 @@ func (t *Tar) Walk(archive string, walkFn WalkFunc) error { } defer file.Close() - tr := tar.NewReader(file) + err = t.Open(file, 0) + if err != nil { + return fmt.Errorf("opening archive: %v", err) + } + defer t.Close() for { - hdr, err := tr.Next() + f, err := t.Read() if err == io.EOF { break } @@ -388,20 +441,16 @@ func (t *Tar) Walk(archive string, walkFn WalkFunc) error { } return fmt.Errorf("opening next file: %v", err) } - err = walkFn(File{ - FileInfo: hdr.FileInfo(), - Header: hdr, - ReadCloser: ReadFakeCloser{tr}, - }) + err = walkFn(f) if err != nil { if err == ErrStopWalk { break } if t.ContinueOnError { - log.Printf("[ERROR] Walking %s: %v", hdr.Name, err) + log.Printf("[ERROR] Walking %s: %v", f.Name(), err) continue } - return fmt.Errorf("walking %s: %v", hdr.Name, err) + return fmt.Errorf("walking %s: %v", f.Name(), err) } } diff --git a/archive/targz.go b/archive/targz.go new file mode 100644 index 00000000..d2ea64fb --- /dev/null +++ b/archive/targz.go @@ -0,0 +1,109 @@ +package archive + +import ( + "compress/gzip" + "fmt" + "io" + "strings" +) + +// TarGz facilitates gzip compression +// (RFC 1952) of tarball archives. +type TarGz struct { + *Tar + + // The compression level to use, as described + // in the compress/gzip package. + CompressionLevel int +} + +// Archive creates a gzip-compressed tar file at +// destination containing the files listed in sources. +// The destination must end with ".tar.gz" or ".tgz". +// File paths can be those of regular files or +// directories. Regular files are stored at the 'root' +// of the archive, and directories are recursively added. +func (tgz *TarGz) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".tar.gz") && + !strings.HasSuffix(destination, ".tgz") { + return fmt.Errorf("output filename must have .tar.gz or .tgz extension") + } + tgz.wrapWriter() + return tgz.Tar.Archive(sources, destination) +} + +// Unarchive unpacks the gzip-compressed tarball +// at source to destination. Destination will be +// treated as a folder name. +func (tgz *TarGz) Unarchive(source, destination string) error { + tgz.wrapReader() + return tgz.Tar.Unarchive(source, destination) +} + +// Walk calls walkFn for each visited item in archive. +func (tgz *TarGz) Walk(archive string, walkFn WalkFunc) error { + tgz.wrapReader() + return tgz.Tar.Walk(archive, walkFn) +} + +// Create opens tgz for writing a gzip-compressed +// tar archive to out. +func (tgz *TarGz) Create(out io.Writer) error { + tgz.wrapWriter() + return tgz.Create(out) +} + +// Open opens t for reading an archive from in. +// The size parameter is not needed. +func (tgz *TarGz) Open(in io.Reader, size int64) error { + tgz.wrapReader() + return tgz.Tar.Open(in, size) +} + +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (tgz *TarGz) Extract(source, target, destination string) error { + tgz.wrapReader() + return tgz.Tar.Extract(source, target, destination) +} + +func (tgz *TarGz) wrapWriter() { + var gzw *gzip.Writer + tgz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { + var err error + gzw, err = gzip.NewWriterLevel(w, tgz.CompressionLevel) + return gzw, err + } + tgz.Tar.cleanupWrapFn = func() { + gzw.Close() + } +} + +func (tgz *TarGz) wrapReader() { + var gzr *gzip.Reader + tgz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { + var err error + gzr, err = gzip.NewReader(r) + return gzr, err + } + tgz.Tar.cleanupWrapFn = func() { + gzr.Close() + } +} + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(TarGz)) + _ = Writer(new(TarGz)) + _ = Archiver(new(TarGz)) + _ = Unarchiver(new(TarGz)) + _ = Walker(new(TarGz)) + _ = Extractor(new(TarGz)) +) + +// DefaultTarGz is a convenient TarGz archiver ready to use. +var DefaultTarGz = &TarGz{ + CompressionLevel: gzip.DefaultCompression, + Tar: DefaultTar, +} diff --git a/archive/zip.go b/archive/zip.go index fb944d0c..97609741 100644 --- a/archive/zip.go +++ b/archive/zip.go @@ -16,7 +16,7 @@ import ( // See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT. type Zip struct { // The compression level to use, as described - // in the compress/flat package. + // in the compress/flate package. CompressionLevel int // Whether to overwrite existing files; if false, @@ -72,6 +72,16 @@ func (z *Zip) Archive(sources []string, destination string) error { return fmt.Errorf("file already exists: %s", destination) } + // make the folder to contain the resulting archive + // if it does not already exist + destDir := filepath.Dir(destination) + if z.MkdirAll && !fileExists(destDir) { + err := mkdir(destDir) + if err != nil { + return fmt.Errorf("making folder for destination: %v", err) + } + } + out, err := os.Create(destination) if err != nil { return fmt.Errorf("creating %s: %v", destination, err) From 0d66ce6b77f970dd4c6afe4cf2f91fda2f8ec2a6 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 08:52:38 -0700 Subject: [PATCH 04/18] Implement xz; use faster lib for decompression --- archive/tar.go | 13 +++--- archive/targz.go | 21 +++++----- archive/tarxz.go | 103 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 18 deletions(-) create mode 100644 archive/tarxz.go diff --git a/archive/tar.go b/archive/tar.go index fc8aef84..25c05bcb 100644 --- a/archive/tar.go +++ b/archive/tar.go @@ -51,11 +51,10 @@ type Tar struct { cleanupWrapFn func() } -// Archive creates a .tar file at destination containing -// the files listed in sources. The destination must end -// with ".tar". File paths can be those of regular files -// or directories. Regular files are stored at the 'root' -// of the archive, and directories are recursively added. +// Archive creates a tarball file at destination containing +// the files listed in sources. The destination must end with +// ".tar". File paths can be those of regular files or +// directories; directories will be recursively added. func (t *Tar) Archive(sources []string, destination string) error { if t.writerWrapFn == nil && !strings.HasSuffix(destination, ".tar") { return fmt.Errorf("output filename must have .tar extension") @@ -358,8 +357,8 @@ func (t *Tar) Write(f File) error { return nil } -// Open opens t for reading an archive from in. -// The size parameter is not needed. +// Open opens t for reading an archive from +// in. The size parameter is not used. func (t *Tar) Open(in io.Reader, size int64) error { if t.tr != nil { return fmt.Errorf("tar archive is already open for reading") diff --git a/archive/targz.go b/archive/targz.go index d2ea64fb..bc9f6531 100644 --- a/archive/targz.go +++ b/archive/targz.go @@ -17,12 +17,11 @@ type TarGz struct { CompressionLevel int } -// Archive creates a gzip-compressed tar file at -// destination containing the files listed in sources. -// The destination must end with ".tar.gz" or ".tgz". -// File paths can be those of regular files or -// directories. Regular files are stored at the 'root' -// of the archive, and directories are recursively added. +// Archive creates a compressed tar file at destination +// containing the files listed in sources. The destination +// must end with ".tar.gz" or ".tgz". File paths can be +// those of regular files or directories; directories will +// be recursively added. func (tgz *TarGz) Archive(sources []string, destination string) error { if !strings.HasSuffix(destination, ".tar.gz") && !strings.HasSuffix(destination, ".tgz") { @@ -32,8 +31,8 @@ func (tgz *TarGz) Archive(sources []string, destination string) error { return tgz.Tar.Archive(sources, destination) } -// Unarchive unpacks the gzip-compressed tarball -// at source to destination. Destination will be +// Unarchive unpacks the compressed tarball at +// source to destination. Destination will be // treated as a folder name. func (tgz *TarGz) Unarchive(source, destination string) error { tgz.wrapReader() @@ -46,15 +45,15 @@ func (tgz *TarGz) Walk(archive string, walkFn WalkFunc) error { return tgz.Tar.Walk(archive, walkFn) } -// Create opens tgz for writing a gzip-compressed +// Create opens txz for writing a compressed // tar archive to out. func (tgz *TarGz) Create(out io.Writer) error { tgz.wrapWriter() return tgz.Create(out) } -// Open opens t for reading an archive from in. -// The size parameter is not needed. +// Open opens t for reading a compressed archive from +// in. The size parameter is not used. func (tgz *TarGz) Open(in io.Reader, size int64) error { tgz.wrapReader() return tgz.Tar.Open(in, size) diff --git a/archive/tarxz.go b/archive/tarxz.go new file mode 100644 index 00000000..8f061107 --- /dev/null +++ b/archive/tarxz.go @@ -0,0 +1,103 @@ +package archive + +import ( + "fmt" + "io" + "strings" + + "github.com/ulikunitz/xz" + fastxz "github.com/xi2/xz" +) + +// TarXz facilitates xz compression +// (https://tukaani.org/xz/format.html) +// of tarball archives. +type TarXz struct { + *Tar +} + +// Archive creates a compressed tar file at destination +// containing the files listed in sources. The destination +// must end with ".tar.gz" or ".txz". File paths can be +// those of regular files or directories; directories will +// be recursively added. +func (txz *TarXz) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".tar.xz") && + !strings.HasSuffix(destination, ".txz") { + return fmt.Errorf("output filename must have .tar.xz or .txz extension") + } + txz.wrapWriter() + return txz.Tar.Archive(sources, destination) +} + +// Unarchive unpacks the compressed tarball at +// source to destination. Destination will be +// treated as a folder name. +func (txz *TarXz) Unarchive(source, destination string) error { + txz.wrapReader() + return txz.Tar.Unarchive(source, destination) +} + +// Walk calls walkFn for each visited item in archive. +func (txz *TarXz) Walk(archive string, walkFn WalkFunc) error { + txz.wrapReader() + return txz.Tar.Walk(archive, walkFn) +} + +// Create opens txz for writing a compressed +// tar archive to out. +func (txz *TarXz) Create(out io.Writer) error { + txz.wrapWriter() + return txz.Create(out) +} + +// Open opens t for reading a compressed archive from +// in. The size parameter is not used. +func (txz *TarXz) Open(in io.Reader, size int64) error { + txz.wrapReader() + return txz.Tar.Open(in, size) +} + +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (txz *TarXz) Extract(source, target, destination string) error { + txz.wrapReader() + return txz.Tar.Extract(source, target, destination) +} + +func (txz *TarXz) wrapWriter() { + var xzw *xz.Writer + txz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { + var err error + xzw, err = xz.NewWriter(w) + return xzw, err + } + txz.Tar.cleanupWrapFn = func() { + xzw.Close() + } +} + +func (txz *TarXz) wrapReader() { + var xzr *fastxz.Reader + txz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { + var err error + xzr, err = fastxz.NewReader(r, 0) + return xzr, err + } +} + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(TarXz)) + _ = Writer(new(TarXz)) + _ = Archiver(new(TarXz)) + _ = Unarchiver(new(TarXz)) + _ = Walker(new(TarXz)) + _ = Extractor(new(TarXz)) +) + +// DefaultTarXz is a convenient TarXz archiver ready to use. +var DefaultTarXz = &TarXz{ + Tar: DefaultTar, +} From cdc9a4c39107cae139bf9962f5dfc37ad37ebc60 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 09:04:49 -0700 Subject: [PATCH 05/18] Implement bzip2 tarballs --- archive/tar.go | 2 +- archive/tarbz2.go | 110 ++++++++++++++++++++++++++++++++++++++++++++++ archive/targz.go | 2 +- archive/tarxz.go | 2 +- archive/zip.go | 2 +- 5 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 archive/tarbz2.go diff --git a/archive/tar.go b/archive/tar.go index 25c05bcb..e3f204e0 100644 --- a/archive/tar.go +++ b/archive/tar.go @@ -520,7 +520,7 @@ var ( _ = Extractor(new(Tar)) ) -// DefaultTar is a convenient Tar archiver ready to use. +// DefaultTar is a convenient archiver ready to use. var DefaultTar = &Tar{ MkdirAll: true, } diff --git a/archive/tarbz2.go b/archive/tarbz2.go new file mode 100644 index 00000000..212a39ff --- /dev/null +++ b/archive/tarbz2.go @@ -0,0 +1,110 @@ +package archive + +import ( + "fmt" + "io" + "strings" + + "github.com/dsnet/compress/bzip2" +) + +// TarBz2 facilitates bzip2 compression +// (https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf) +// of tarball archives. +type TarBz2 struct { + *Tar + + CompressionLevel int +} + +// Archive creates a compressed tar file at destination +// containing the files listed in sources. The destination +// must end with ".tar.gz" or ".tbz2". File paths can be +// those of regular files or directories; directories will +// be recursively added. +func (tbz2 *TarBz2) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".tar.bz2") && + !strings.HasSuffix(destination, ".tbz2") { + return fmt.Errorf("output filename must have .tar.bz2 or .tbz2 extension") + } + tbz2.wrapWriter() + return tbz2.Tar.Archive(sources, destination) +} + +// Unarchive unpacks the compressed tarball at +// source to destination. Destination will be +// treated as a folder name. +func (tbz2 *TarBz2) Unarchive(source, destination string) error { + tbz2.wrapReader() + return tbz2.Tar.Unarchive(source, destination) +} + +// Walk calls walkFn for each visited item in archive. +func (tbz2 *TarBz2) Walk(archive string, walkFn WalkFunc) error { + tbz2.wrapReader() + return tbz2.Tar.Walk(archive, walkFn) +} + +// Create opens tbz2 for writing a compressed +// tar archive to out. +func (tbz2 *TarBz2) Create(out io.Writer) error { + tbz2.wrapWriter() + return tbz2.Create(out) +} + +// Open opens t for reading a compressed archive from +// in. The size parameter is not used. +func (tbz2 *TarBz2) Open(in io.Reader, size int64) error { + tbz2.wrapReader() + return tbz2.Tar.Open(in, size) +} + +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (tbz2 *TarBz2) Extract(source, target, destination string) error { + tbz2.wrapReader() + return tbz2.Tar.Extract(source, target, destination) +} + +func (tbz2 *TarBz2) wrapWriter() { + var bz2w *bzip2.Writer + tbz2.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { + var err error + bz2w, err = bzip2.NewWriter(w, &bzip2.WriterConfig{ + Level: tbz2.CompressionLevel, + }) + return bz2w, err + } + tbz2.Tar.cleanupWrapFn = func() { + bz2w.Close() + } +} + +func (tbz2 *TarBz2) wrapReader() { + var bz2r *bzip2.Reader + tbz2.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { + var err error + bz2r, err = bzip2.NewReader(r, nil) + return bz2r, err + } + tbz2.Tar.cleanupWrapFn = func() { + bz2r.Close() + } +} + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(TarBz2)) + _ = Writer(new(TarBz2)) + _ = Archiver(new(TarBz2)) + _ = Unarchiver(new(TarBz2)) + _ = Walker(new(TarBz2)) + _ = Extractor(new(TarBz2)) +) + +// DefaultTarBz2 is a convenient archiver ready to use. +var DefaultTarBz2 = &TarBz2{ + CompressionLevel: bzip2.DefaultCompression, + Tar: DefaultTar, +} diff --git a/archive/targz.go b/archive/targz.go index bc9f6531..4ee2792a 100644 --- a/archive/targz.go +++ b/archive/targz.go @@ -101,7 +101,7 @@ var ( _ = Extractor(new(TarGz)) ) -// DefaultTarGz is a convenient TarGz archiver ready to use. +// DefaultTarGz is a convenient archiver ready to use. var DefaultTarGz = &TarGz{ CompressionLevel: gzip.DefaultCompression, Tar: DefaultTar, diff --git a/archive/tarxz.go b/archive/tarxz.go index 8f061107..3dcc415b 100644 --- a/archive/tarxz.go +++ b/archive/tarxz.go @@ -97,7 +97,7 @@ var ( _ = Extractor(new(TarXz)) ) -// DefaultTarXz is a convenient TarXz archiver ready to use. +// DefaultTarXz is a convenient archiver ready to use. var DefaultTarXz = &TarXz{ Tar: DefaultTar, } diff --git a/archive/zip.go b/archive/zip.go index 97609741..0bf3ecfe 100644 --- a/archive/zip.go +++ b/archive/zip.go @@ -526,7 +526,7 @@ var compressedFormats = map[string]struct{}{ ".zipx": {}, } -// DefaultZip is a convenient Zip archiver ready to use. +// DefaultZip is a convenient archiver ready to use. var DefaultZip = &Zip{ CompressionLevel: flate.DefaultCompression, MkdirAll: true, From 066db1754f9120d8ceec6236dbaddc6a9eb2a4c2 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 09:14:36 -0700 Subject: [PATCH 06/18] Implement tar.lz4 --- archive/tarbz2.go | 2 +- archive/tarlz4.go | 105 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 archive/tarlz4.go diff --git a/archive/tarbz2.go b/archive/tarbz2.go index 212a39ff..696d2cea 100644 --- a/archive/tarbz2.go +++ b/archive/tarbz2.go @@ -19,7 +19,7 @@ type TarBz2 struct { // Archive creates a compressed tar file at destination // containing the files listed in sources. The destination -// must end with ".tar.gz" or ".tbz2". File paths can be +// must end with ".tar.bz2" or ".tbz2". File paths can be // those of regular files or directories; directories will // be recursively added. func (tbz2 *TarBz2) Archive(sources []string, destination string) error { diff --git a/archive/tarlz4.go b/archive/tarlz4.go new file mode 100644 index 00000000..633416f1 --- /dev/null +++ b/archive/tarlz4.go @@ -0,0 +1,105 @@ +package archive + +import ( + "fmt" + "io" + "strings" + + "github.com/pierrec/lz4" +) + +// TarLz4 facilitates lz4 compression +// (https://github.com/lz4/lz4/tree/master/doc) +// of tarball archives. +type TarLz4 struct { + *Tar + + // The compression level to use when writing. + // Minimum 0 (fast compression), maximum 12 + // (most space savings). + CompressionLevel int +} + +// Archive creates a compressed tar file at destination +// containing the files listed in sources. The destination +// must end with ".tar.lz4" or ".tlz4". File paths can be +// those of regular files or directories; directories will +// be recursively added. +func (tlz4 *TarLz4) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".tar.lz4") && + !strings.HasSuffix(destination, ".tlz4") { + return fmt.Errorf("output filename must have .tar.lz4 or .tlz4 extension") + } + tlz4.wrapWriter() + return tlz4.Tar.Archive(sources, destination) +} + +// Unarchive unpacks the compressed tarball at +// source to destination. Destination will be +// treated as a folder name. +func (tlz4 *TarLz4) Unarchive(source, destination string) error { + tlz4.wrapReader() + return tlz4.Tar.Unarchive(source, destination) +} + +// Walk calls walkFn for each visited item in archive. +func (tlz4 *TarLz4) Walk(archive string, walkFn WalkFunc) error { + tlz4.wrapReader() + return tlz4.Tar.Walk(archive, walkFn) +} + +// Create opens tlz4 for writing a compressed +// tar archive to out. +func (tlz4 *TarLz4) Create(out io.Writer) error { + tlz4.wrapWriter() + return tlz4.Create(out) +} + +// Open opens t for reading a compressed archive from +// in. The size parameter is not used. +func (tlz4 *TarLz4) Open(in io.Reader, size int64) error { + tlz4.wrapReader() + return tlz4.Tar.Open(in, size) +} + +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (tlz4 *TarLz4) Extract(source, target, destination string) error { + tlz4.wrapReader() + return tlz4.Tar.Extract(source, target, destination) +} + +func (tlz4 *TarLz4) wrapWriter() { + var lz4w *lz4.Writer + tlz4.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { + lz4w = lz4.NewWriter(w) + lz4w.Header.CompressionLevel = tlz4.CompressionLevel + return lz4w, nil + } + tlz4.Tar.cleanupWrapFn = func() { + lz4w.Close() + } +} + +func (tlz4 *TarLz4) wrapReader() { + tlz4.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { + return lz4.NewReader(r), nil + } +} + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(TarLz4)) + _ = Writer(new(TarLz4)) + _ = Archiver(new(TarLz4)) + _ = Unarchiver(new(TarLz4)) + _ = Walker(new(TarLz4)) + _ = Extractor(new(TarLz4)) +) + +// DefaultTarLz4 is a convenient archiver ready to use. +var DefaultTarLz4 = &TarLz4{ + CompressionLevel: 9, // https://github.com/lz4/lz4/blob/1b819bfd633ae285df2dfe1b0589e1ec064f2873/lib/lz4hc.h#L48 + Tar: DefaultTar, +} From 4d7927b0967e76641dd8b4c35492aa3ef2e99529 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 09:21:05 -0700 Subject: [PATCH 07/18] Implement tar.sz --- archive/tarsz.go | 98 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 archive/tarsz.go diff --git a/archive/tarsz.go b/archive/tarsz.go new file mode 100644 index 00000000..de9052b2 --- /dev/null +++ b/archive/tarsz.go @@ -0,0 +1,98 @@ +package archive + +import ( + "fmt" + "io" + "strings" + + "github.com/golang/snappy" +) + +// TarSz facilitates Snappy compression +// (https://github.com/google/snappy) +// of tarball archives. +type TarSz struct { + *Tar +} + +// Archive creates a compressed tar file at destination +// containing the files listed in sources. The destination +// must end with ".tar.sz" or ".tsz". File paths can be +// those of regular files or directories; directories will +// be recursively added. +func (tsz *TarSz) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".tar.sz") && + !strings.HasSuffix(destination, ".tsz") { + return fmt.Errorf("output filename must have .tar.sz or .tsz extension") + } + tsz.wrapWriter() + return tsz.Tar.Archive(sources, destination) +} + +// Unarchive unpacks the compressed tarball at +// source to destination. Destination will be +// treated as a folder name. +func (tsz *TarSz) Unarchive(source, destination string) error { + tsz.wrapReader() + return tsz.Tar.Unarchive(source, destination) +} + +// Walk calls walkFn for each visited item in archive. +func (tsz *TarSz) Walk(archive string, walkFn WalkFunc) error { + tsz.wrapReader() + return tsz.Tar.Walk(archive, walkFn) +} + +// Create opens tsz for writing a compressed +// tar archive to out. +func (tsz *TarSz) Create(out io.Writer) error { + tsz.wrapWriter() + return tsz.Create(out) +} + +// Open opens t for reading a compressed archive from +// in. The size parameter is not used. +func (tsz *TarSz) Open(in io.Reader, size int64) error { + tsz.wrapReader() + return tsz.Tar.Open(in, size) +} + +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (tsz *TarSz) Extract(source, target, destination string) error { + tsz.wrapReader() + return tsz.Tar.Extract(source, target, destination) +} + +func (tsz *TarSz) wrapWriter() { + var sw *snappy.Writer + tsz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { + sw = snappy.NewWriter(w) + return sw, nil + } + tsz.Tar.cleanupWrapFn = func() { + sw.Close() + } +} + +func (tsz *TarSz) wrapReader() { + tsz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { + return snappy.NewReader(r), nil + } +} + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(TarSz)) + _ = Writer(new(TarSz)) + _ = Archiver(new(TarSz)) + _ = Unarchiver(new(TarSz)) + _ = Walker(new(TarSz)) + _ = Extractor(new(TarSz)) +) + +// DefaultTarSz is a convenient archiver ready to use. +var DefaultTarSz = &TarSz{ + Tar: DefaultTar, +} From 5c97d9198584bd4af28f6964093bd198c15578cd Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 09:51:10 -0700 Subject: [PATCH 08/18] Implement rar (reading only) --- archive/rar.go | 328 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 archive/rar.go diff --git a/archive/rar.go b/archive/rar.go new file mode 100644 index 00000000..28690612 --- /dev/null +++ b/archive/rar.go @@ -0,0 +1,328 @@ +package archive + +import ( + "fmt" + "io" + "log" + "os" + "path" + "path/filepath" + "time" + + "github.com/nwaples/rardecode" +) + +// Rar provides facilities for reading RAR archives. +// See https://www.rarlab.com/technote.htm. +type Rar struct { + // Whether to overwrite existing files; if false, + // an error is returned if the file exists. + OverwriteExisting bool + + // Whether to make all the directories necessary + // to create a rar archive in the desired path. + MkdirAll bool + + // A single top-level folder can be implicitly + // created by the Unarchive method if the files + // to be extracted from the archive do not all + // have a common root. This roughly mimics the + // behavior of archival tools integrated into OS + // file browsers which create a subfolder to + // avoid unexpectedly littering the destination + // folder with potentially many files, causing a + // problematic cleanup/organization situation. + // This feature is available for both creation + // and extraction of archives, but may be slightly + // inefficient with lots and lots of files, + // especially on extraction. + ImplicitTopLevelFolder bool + + // If true, errors encountered during reading + // or writing a single file will be logged and + // the operation will continue on remaining files. + ContinueOnError bool + + // The password to open archives (optional). + Password string + + rr *rardecode.Reader +} + +// Unarchive unpacks the .rar file at source to destination. +// Destination will be treated as a folder name. +func (r *Rar) Unarchive(source, destination string) error { + if !fileExists(destination) && r.MkdirAll { + err := mkdir(destination) + if err != nil { + return fmt.Errorf("preparing destination: %v", err) + } + } + + // if the files in the archive do not all share a common + // root, then make sure we extract to a single subfolder + // rather than potentially littering the destination... + if r.ImplicitTopLevelFolder { + var err error + destination, err = r.addTopLevelFolder(source, destination) + if err != nil { + return fmt.Errorf("scanning source archive: %v", err) + } + } + + file, err := os.Open(source) + if err != nil { + return fmt.Errorf("opening source archive: %v", err) + } + defer file.Close() + + err = r.Open(file, 0) + if err != nil { + return fmt.Errorf("opening rar archive for reading: %v", err) + } + defer r.Close() + + for { + err := r.unrarNext(destination) + if err == io.EOF { + break + } + if err != nil { + if r.ContinueOnError { + log.Printf("[ERROR] Reading file in rar archive: %v", err) + continue + } + return fmt.Errorf("reading file in rar archive: %v", err) + } + } + + return nil +} + +// addTopLevelFolder scans the files contained inside +// the tarball named sourceArchive and returns a modified +// destination if all the files do not share the same +// top-level folder. +func (r *Rar) addTopLevelFolder(sourceArchive, destination string) (string, error) { + file, err := os.Open(sourceArchive) + if err != nil { + return "", fmt.Errorf("opening source archive: %v", err) + } + defer file.Close() + + rr, err := rardecode.NewReader(file, r.Password) + if err != nil { + return "", fmt.Errorf("creating archive reader: %v", err) + } + + var files []string + for { + hdr, err := rr.Next() + if err == io.EOF { + break + } + if err != nil { + return "", fmt.Errorf("scanning tarball's file listing: %v", err) + } + files = append(files, hdr.Name) + } + + if multipleTopLevels(files) { + destination = filepath.Join(destination, folderNameFromFileName(sourceArchive)) + } + + return destination, nil +} + +func (r *Rar) unrarNext(to string) error { + f, err := r.Read() + if err != nil { + return err // don't wrap error; calling loop must break on io.EOF + } + header, ok := f.Header.(*rardecode.FileHeader) + if !ok { + return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) + } + return r.unrarFile(f, filepath.Join(to, header.Name)) +} + +func (r *Rar) unrarFile(f File, to string) error { + // do not overwrite existing files, if configured + if !f.IsDir() && !r.OverwriteExisting && fileExists(to) { + return fmt.Errorf("file already exists: %s", to) + } + + hdr, ok := f.Header.(*rardecode.FileHeader) + if !ok { + return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) + } + + // if files come before their containing folders, then we must + // create their folders before writing the file + err := mkdir(filepath.Dir(to)) + if err != nil { + return fmt.Errorf("making parent directories: %v", err) + } + + return writeNewFile(to, r.rr, hdr.Mode()) +} + +// Open opens t for reading an archive from +// in. The size parameter is not used. +func (r *Rar) Open(in io.Reader, size int64) error { + if r.rr != nil { + return fmt.Errorf("rar archive is already open for reading") + } + var err error + r.rr, err = rardecode.NewReader(in, r.Password) + return err +} + +// Read reads the next file from t, which must have +// already been opened for reading. If there are no +// more files, the error is io.EOF. The File must +// be closed when finished reading from it. +func (r *Rar) Read() (File, error) { + if r.rr == nil { + return File{}, fmt.Errorf("rar archive is not open") + } + + hdr, err := r.rr.Next() + if err != nil { + return File{}, err // don't wrap error; preserve io.EOF + } + + file := File{ + FileInfo: rarFileInfo{hdr}, + Header: hdr, + ReadCloser: ReadFakeCloser{r.rr}, + } + + return file, nil +} + +// Close closes the rar archive(s) opened by Create and Open. +func (r *Rar) Close() error { + return nil +} + +// Walk calls walkFn for each visited item in archive. +func (r *Rar) Walk(archive string, walkFn WalkFunc) error { + file, err := os.Open(archive) + if err != nil { + return fmt.Errorf("opening archive file: %v", err) + } + defer file.Close() + + err = r.Open(file, 0) + if err != nil { + return fmt.Errorf("opening archive: %v", err) + } + defer r.Close() + + for { + f, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + if r.ContinueOnError { + log.Printf("[ERROR] Opening next file: %v", err) + continue + } + return fmt.Errorf("opening next file: %v", err) + } + err = walkFn(f) + if err != nil { + if err == ErrStopWalk { + break + } + if r.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", f.Name(), err) + continue + } + return fmt.Errorf("walking %s: %v", f.Name(), err) + } + } + + return nil +} + +// Extract extracts a single file from the rar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (r *Rar) Extract(source, target, destination string) error { + // target refers to a path inside the archive, which should be clean also + target = path.Clean(target) + + // if the target ends up being a directory, then + // we will continue walking and extracting files + // until we are no longer within that directory + var targetDirPath string + + return r.Walk(source, func(f File) error { + th, ok := f.Header.(*rardecode.FileHeader) + if !ok { + return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) + } + + // importantly, cleaning the path strips tailing slash, + // which must be appended to folders within the archive + name := path.Clean(th.Name) + if f.IsDir() && target == name { + targetDirPath = path.Dir(name) + } + + if within(target, th.Name) { + // either this is the exact file we want, or is + // in the directory we want to extract + + // build the filename we will extract to + end, err := filepath.Rel(targetDirPath, th.Name) + if err != nil { + return fmt.Errorf("relativizing paths: %v", err) + } + joined := filepath.Join(destination, end) + + err = r.unrarFile(f, joined) + if err != nil { + return fmt.Errorf("extracting file %s: %v", th.Name, err) + } + + // if our target was not a directory, stop walk + if targetDirPath == "" { + return ErrStopWalk + } + } else if targetDirPath != "" { + // finished walking the entire directory + return ErrStopWalk + } + + return nil + }) +} + +type rarFileInfo struct { + fh *rardecode.FileHeader +} + +func (rfi rarFileInfo) Name() string { return rfi.fh.Name } +func (rfi rarFileInfo) Size() int64 { return rfi.fh.UnPackedSize } +func (rfi rarFileInfo) Mode() os.FileMode { return rfi.fh.Mode() } +func (rfi rarFileInfo) ModTime() time.Time { return rfi.fh.ModificationTime } +func (rfi rarFileInfo) IsDir() bool { return rfi.fh.IsDir } +func (rfi rarFileInfo) Sys() interface{} { return nil } + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(Rar)) + _ = Unarchiver(new(Rar)) + _ = Walker(new(Rar)) + _ = Extractor(new(Rar)) + _ = os.FileInfo(rarFileInfo{}) +) + +// DefaultRar is a convenient archiver ready to use. +var DefaultRar = &Rar{ + MkdirAll: true, +} From 35c2dd73106fb534312a515cd8f0010de728d63d Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 12:17:59 -0700 Subject: [PATCH 09/18] Implement CLI --- archive/cmd/arc/main.go | 289 ++++++++++++++++++++++++++++++++++++++++ archive/rar.go | 2 + archive/tar.go | 2 + archive/tarbz2.go | 2 + archive/targz.go | 2 + archive/tarlz4.go | 2 + archive/tarsz.go | 2 + archive/tarxz.go | 2 + archive/zip.go | 6 + 9 files changed, 309 insertions(+) create mode 100644 archive/cmd/arc/main.go diff --git a/archive/cmd/arc/main.go b/archive/cmd/arc/main.go new file mode 100644 index 00000000..7d8e4dfc --- /dev/null +++ b/archive/cmd/arc/main.go @@ -0,0 +1,289 @@ +package main + +import ( + "archive/tar" + "archive/zip" + "bytes" + "compress/flate" + "flag" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/nwaples/rardecode" + + "github.com/mholt/archiver/archive" +) + +var ( + compressionLevel int + overwriteExisting bool + mkdirAll bool + selectiveCompression bool + implicitTopLevelFolder bool + continueOnError bool +) + +func init() { + flag.IntVar(&compressionLevel, "level", flate.DefaultCompression, "Compression level") + flag.BoolVar(&overwriteExisting, "overwrite", false, "Overwrite existing files") + flag.BoolVar(&mkdirAll, "mkdirs", false, "Make all necessary directories") + flag.BoolVar(&selectiveCompression, "smart", true, "Only compress files which are not already compressed (zip only)") + flag.BoolVar(&implicitTopLevelFolder, "folder-safe", true, "If an archive does not have a single top-level folder, create one implicitly") + flag.BoolVar(&continueOnError, "allow-errors", true, "Log errors and continue processing") +} + +func main() { + if len(os.Args) == 2 && + (os.Args[1] == "-h" || os.Args[1] == "--help" || os.Args[1] == "help") { + buf := new(bytes.Buffer) + flag.CommandLine.SetOutput(buf) + buf.WriteString(usage) + flag.CommandLine.PrintDefaults() + fmt.Println(buf.String()) + os.Exit(0) + } + if len(os.Args) < 3 { + fatal(usage) + } + flag.Parse() + + // figure out which file format we're working with + var ext string + archiveName := flag.Arg(1) + for _, format := range supportedFormats { + if strings.HasSuffix(archiveName, format) { + ext = format + break + } + } + + // configure an archiver + var iface interface{} + mytar := &archive.Tar{ + OverwriteExisting: overwriteExisting, + MkdirAll: mkdirAll, + ImplicitTopLevelFolder: implicitTopLevelFolder, + ContinueOnError: continueOnError, + } + switch ext { + case ".rar": + iface = &archive.Rar{ + OverwriteExisting: overwriteExisting, + MkdirAll: mkdirAll, + ImplicitTopLevelFolder: implicitTopLevelFolder, + ContinueOnError: continueOnError, + Password: os.Getenv("ARCHIVE_PASSWORD"), + } + + case ".tar": + iface = mytar + + case ".tbz2": + fallthrough + case ".tar.bz2": + iface = &archive.TarBz2{ + Tar: mytar, + } + + case ".tgz": + fallthrough + case ".tar.gz": + iface = &archive.TarGz{ + Tar: mytar, + CompressionLevel: compressionLevel, + } + + case ".tlz4": + fallthrough + case ".tar.lz4": + iface = &archive.TarLz4{ + Tar: mytar, + CompressionLevel: compressionLevel, + } + + case ".tsz": + fallthrough + case ".tar.sz": + iface = &archive.TarSz{ + Tar: mytar, + } + + case ".txz": + fallthrough + case ".tar.xz": + iface = &archive.TarXz{ + Tar: mytar, + } + + case ".zip": + iface = archive.Zip{ + CompressionLevel: compressionLevel, + OverwriteExisting: overwriteExisting, + MkdirAll: mkdirAll, + SelectiveCompression: selectiveCompression, + ImplicitTopLevelFolder: implicitTopLevelFolder, + ContinueOnError: continueOnError, + } + + default: + archiveExt := filepath.Ext(archiveName) + if archiveExt == "" { + fatal("format missing (use file extension to specify archive/compression format)") + } else { + fatalf("unsupported format '%s'", archiveExt) + } + } + + var err error + + switch flag.Arg(0) { + case "archive": + a, ok := iface.(archive.Archiver) + if !ok { + fatalf("the archive command does not support the %s format", iface) + } + err = a.Archive(flag.Args()[2:], flag.Arg(1)) + + case "unarchive": + a, ok := iface.(archive.Unarchiver) + if !ok { + fatalf("the unarchive command does not support the %s format", iface) + } + err = a.Unarchive(flag.Arg(1), flag.Arg(2)) + + case "extract": + e, ok := iface.(archive.Extractor) + if !ok { + fatalf("the unarchive command does not support the %s format", iface) + } + err = e.Extract(flag.Arg(1), flag.Arg(2), flag.Arg(3)) + + case "ls": + w, ok := iface.(archive.Walker) + if !ok { + fatalf("the unarchive command does not support the %s format", iface) + } + + var count int + err = w.Walk(flag.Arg(1), func(f archive.File) error { + count++ + switch h := f.Header.(type) { + case *zip.FileHeader: + fmt.Printf("%s\t%d\t%d\t%s\t%s\n", + f.Mode(), + h.Method, + f.Size(), + f.ModTime(), + h.Name, + ) + case *tar.Header: + fmt.Printf("%s\t%s\t%s\t%d\t%s\t%s\n", + f.Mode(), + h.Uname, + h.Gname, + f.Size(), + f.ModTime(), + h.Name, + ) + + case *rardecode.FileHeader: + fmt.Printf("%s\t%d\t%d\t%s\t%s\n", + f.Mode(), + int(h.HostOS), + f.Size(), + f.ModTime(), + h.Name, + ) + + default: + fmt.Printf("%s\t%d\t%s\t?/%s\n", + f.Mode(), + f.Size(), + f.ModTime(), + f.Name(), // we don't know full path from this + ) + } + return nil + }) + + fmt.Printf("total %d", count) + + default: + fatalf("unrecognized command: %s", flag.Arg(0)) + } + if err != nil { + fatal(err) + } +} + +func fatal(v ...interface{}) { + fmt.Fprintln(os.Stderr, v...) + os.Exit(1) +} + +func fatalf(s string, v ...interface{}) { + fmt.Fprintf(os.Stderr, s+"\n", v...) + os.Exit(1) +} + +// supportedFormats is the list of recognized +// file extensions. They are in an ordered slice +// because ordering is important, since some +// extensions can be substrings of others. +var supportedFormats = []string{ + ".tar.bz2", + ".tar.gz", + ".tar.lz4", + ".tar.sz", + ".tar.xz", + ".rar", + ".tar", + ".zip", + // TODO: add compression formats +} + +const usage = `Usage: arc {archive|unarchive|extract|ls|help} [files...] + archive + Create a new archive file. List the files/folders + to include in the archive; at least one required. + unarchive + Extract an archive file. Provide the archive to + open and the destination folder to extract into. + extract + Extract a single file or folder (recursively) from + an archive. First argument is the source archive, + second is the file to extract (exact path within the + archive is required), and third is destination. + ls + List the contents of the archive. + help + Display this help text. Also -h or --help. + + SPECIFYING THE ARCHIVE FORMAT + The format of the archive is determined by its + file extension. Supported extensions: + .zip + .tar + .tar.gz + .tgz + .tar.bz2 + .tbz2 + .tar.xz + .txz + .tar.lz4 + .tlz4 + .tar.sz + .tsz + .rar (open only) + + PASSWORD-PROTECTED RAR FILES + Export the ARCHIVE_PASSWORD environment variable + to be able to open password-protected RAR archives. + + GLOBAL FLAG REFERENCE + The following global flags may be used before the + sub-command (some flags are format-specific): + +` diff --git a/archive/rar.go b/archive/rar.go index 28690612..8bfc99c7 100644 --- a/archive/rar.go +++ b/archive/rar.go @@ -302,6 +302,8 @@ func (r *Rar) Extract(source, target, destination string) error { }) } +func (r *Rar) String() string { return "rar" } + type rarFileInfo struct { fh *rardecode.FileHeader } diff --git a/archive/tar.go b/archive/tar.go index e3f204e0..245437da 100644 --- a/archive/tar.go +++ b/archive/tar.go @@ -510,6 +510,8 @@ func (t *Tar) Extract(source, target, destination string) error { }) } +func (t *Tar) String() string { return "tar" } + // Compile-time checks to ensure type implements desired interfaces. var ( _ = Reader(new(Tar)) diff --git a/archive/tarbz2.go b/archive/tarbz2.go index 696d2cea..a9f3c9a4 100644 --- a/archive/tarbz2.go +++ b/archive/tarbz2.go @@ -93,6 +93,8 @@ func (tbz2 *TarBz2) wrapReader() { } } +func (tbz2 *TarBz2) String() string { return "tar.bz2" } + // Compile-time checks to ensure type implements desired interfaces. var ( _ = Reader(new(TarBz2)) diff --git a/archive/targz.go b/archive/targz.go index 4ee2792a..fb90ccc2 100644 --- a/archive/targz.go +++ b/archive/targz.go @@ -91,6 +91,8 @@ func (tgz *TarGz) wrapReader() { } } +func (tgz *TarGz) String() string { return "tar.gz" } + // Compile-time checks to ensure type implements desired interfaces. var ( _ = Reader(new(TarGz)) diff --git a/archive/tarlz4.go b/archive/tarlz4.go index 633416f1..7db6ac74 100644 --- a/archive/tarlz4.go +++ b/archive/tarlz4.go @@ -88,6 +88,8 @@ func (tlz4 *TarLz4) wrapReader() { } } +func (tlz4 *TarLz4) String() string { return "tar.lz4" } + // Compile-time checks to ensure type implements desired interfaces. var ( _ = Reader(new(TarLz4)) diff --git a/archive/tarsz.go b/archive/tarsz.go index de9052b2..3d1c6ed9 100644 --- a/archive/tarsz.go +++ b/archive/tarsz.go @@ -82,6 +82,8 @@ func (tsz *TarSz) wrapReader() { } } +func (tsz *TarSz) String() string { return "tar.sz" } + // Compile-time checks to ensure type implements desired interfaces. var ( _ = Reader(new(TarSz)) diff --git a/archive/tarxz.go b/archive/tarxz.go index 3dcc415b..fdc7919a 100644 --- a/archive/tarxz.go +++ b/archive/tarxz.go @@ -87,6 +87,8 @@ func (txz *TarXz) wrapReader() { } } +func (txz *TarXz) String() string { return "tar.xz" } + // Compile-time checks to ensure type implements desired interfaces. var ( _ = Reader(new(TarXz)) diff --git a/archive/zip.go b/archive/zip.go index 0bf3ecfe..9ba9be18 100644 --- a/archive/zip.go +++ b/archive/zip.go @@ -481,6 +481,8 @@ func (z *Zip) Extract(source, target, destination string) error { }) } +func (z *Zip) String() string { return "zip" } + // Compile-time checks to ensure type implements desired interfaces. var ( _ = Reader(new(Zip)) @@ -508,7 +510,9 @@ var compressedFormats = map[string]struct{}{ ".jpeg": {}, ".jpg": {}, ".lz": {}, + ".lz4": {}, ".lzma": {}, + ".m4v": {}, ".mov": {}, ".mp3": {}, ".mp4": {}, @@ -517,8 +521,10 @@ var compressedFormats = map[string]struct{}{ ".png": {}, ".pptx": {}, ".rar": {}, + ".sz": {}, ".tbz2": {}, ".tgz": {}, + ".tsz": {}, ".txz": {}, ".xlsx": {}, ".xz": {}, From 6d0352208c05ca30b777fdf5b0ca88810588d5d2 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 13:19:14 -0700 Subject: [PATCH 10/18] Implement compression formats and CLI subcommands --- archive/archive.go | 12 ++ archive/bz2.go | 54 +++++++ archive/cmd/arc/main.go | 291 ++++++++++++++++++++++++++------------ archive/filecompressor.go | 66 +++++++++ archive/gz.go | 51 +++++++ archive/lz4.go | 46 ++++++ archive/sz.go | 43 ++++++ archive/xz.go | 50 +++++++ 8 files changed, 522 insertions(+), 91 deletions(-) create mode 100644 archive/bz2.go create mode 100644 archive/filecompressor.go create mode 100644 archive/gz.go create mode 100644 archive/lz4.go create mode 100644 archive/sz.go create mode 100644 archive/xz.go diff --git a/archive/archive.go b/archive/archive.go index 27cff3f0..99601f77 100644 --- a/archive/archive.go +++ b/archive/archive.go @@ -101,6 +101,18 @@ type WalkFunc func(f File) error // ErrStopWalk signals Walk to break without error. var ErrStopWalk = fmt.Errorf("walk stopped") +// Compressor compresses to out what it reads from in. +// It also ensures a compatible or matching file extension. +type Compressor interface { + Compress(in io.Reader, out io.Writer) error + CheckExt(filename string) error +} + +// Decompressor decompresses to out what it reads from in. +type Decompressor interface { + Decompress(in io.Reader, out io.Writer) error +} + func fileExists(name string) bool { _, err := os.Stat(name) return !os.IsNotExist(err) diff --git a/archive/bz2.go b/archive/bz2.go new file mode 100644 index 00000000..d29366b1 --- /dev/null +++ b/archive/bz2.go @@ -0,0 +1,54 @@ +package archive + +import ( + "fmt" + "io" + "path/filepath" + + "github.com/dsnet/compress/bzip2" +) + +// Bz2 facilitates bzip2 compression. +type Bz2 struct { + CompressionLevel int +} + +// Compress reads in, compresses it, and writes it to out. +func (bz *Bz2) Compress(in io.Reader, out io.Writer) error { + w, err := bzip2.NewWriter(out, &bzip2.WriterConfig{ + Level: bz.CompressionLevel, + }) + if err != nil { + return err + } + defer w.Close() + _, err = io.Copy(w, in) + return err +} + +// Decompress reads in, decompresses it, and writes it to out. +func (bz *Bz2) Decompress(in io.Reader, out io.Writer) error { + r, err := bzip2.NewReader(in, nil) + if err != nil { + return err + } + defer r.Close() + _, err = io.Copy(out, r) + return err +} + +// CheckExt ensures the file extension matches the format. +func (bz *Bz2) CheckExt(filename string) error { + if filepath.Ext(filename) != ".bz2" { + return fmt.Errorf("filename must have a .bz2 extension") + } + return nil +} + +func (bz *Bz2) String() string { return "bz2" } + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Compressor(new(Bz2)) + _ = Decompressor(new(Bz2)) +) diff --git a/archive/cmd/arc/main.go b/archive/cmd/arc/main.go index 7d8e4dfc..2d848262 100644 --- a/archive/cmd/arc/main.go +++ b/archive/cmd/arc/main.go @@ -35,25 +35,164 @@ func init() { } func main() { - if len(os.Args) == 2 && + if len(os.Args) >= 2 && (os.Args[1] == "-h" || os.Args[1] == "--help" || os.Args[1] == "help") { - buf := new(bytes.Buffer) - flag.CommandLine.SetOutput(buf) - buf.WriteString(usage) - flag.CommandLine.PrintDefaults() - fmt.Println(buf.String()) + fmt.Println(usageString()) os.Exit(0) } if len(os.Args) < 3 { - fatal(usage) + fatal(usageString()) } flag.Parse() + subcommand := flag.Arg(0) + + // get the format we're working with + iface, err := getFormat(subcommand) + if err != nil { + fatal(err) + } + + // run the desired command + switch subcommand { + case "archive": + a, ok := iface.(archive.Archiver) + if !ok { + fatalf("the archive command does not support the %s format", iface) + } + err = a.Archive(flag.Args()[2:], flag.Arg(1)) + + case "unarchive": + a, ok := iface.(archive.Unarchiver) + if !ok { + fatalf("the unarchive command does not support the %s format", iface) + } + err = a.Unarchive(flag.Arg(1), flag.Arg(2)) + + case "extract": + e, ok := iface.(archive.Extractor) + if !ok { + fatalf("the extract command does not support the %s format", iface) + } + err = e.Extract(flag.Arg(1), flag.Arg(2), flag.Arg(3)) + + case "ls": + w, ok := iface.(archive.Walker) + if !ok { + fatalf("the ls command does not support the %s format", iface) + } + + var count int + err = w.Walk(flag.Arg(1), func(f archive.File) error { + count++ + switch h := f.Header.(type) { + case *zip.FileHeader: + fmt.Printf("%s\t%d\t%d\t%s\t%s\n", + f.Mode(), + h.Method, + f.Size(), + f.ModTime(), + h.Name, + ) + case *tar.Header: + fmt.Printf("%s\t%s\t%s\t%d\t%s\t%s\n", + f.Mode(), + h.Uname, + h.Gname, + f.Size(), + f.ModTime(), + h.Name, + ) + + case *rardecode.FileHeader: + fmt.Printf("%s\t%d\t%d\t%s\t%s\n", + f.Mode(), + int(h.HostOS), + f.Size(), + f.ModTime(), + h.Name, + ) + + default: + fmt.Printf("%s\t%d\t%s\t?/%s\n", + f.Mode(), + f.Size(), + f.ModTime(), + f.Name(), // we don't know full path from this + ) + } + return nil + }) + + fmt.Printf("total %d", count) + + case "compress": + c, ok := iface.(archive.Compressor) + if !ok { + fatalf("the compress command does not support the %s format", iface) + } + fc := archive.FileCompressor{Compressor: c} + + in := flag.Arg(1) + out := flag.Arg(2) + + var deleteWhenDone bool + if cs, ok := c.(fmt.Stringer); ok && out == cs.String() { + out = in + "." + out + deleteWhenDone = true + } + + err = fc.CompressFile(in, out) + if err == nil && deleteWhenDone { + err = os.Remove(in) + } + + case "decompress": + c, ok := iface.(archive.Decompressor) + if !ok { + fatalf("the compress command does not support the %s format", iface) + } + fc := archive.FileCompressor{Decompressor: c} + + in := flag.Arg(1) + out := flag.Arg(2) + + var deleteWhenDone bool + if cs, ok := c.(fmt.Stringer); ok && out == "" { + out = strings.TrimSuffix(in, "."+cs.String()) + deleteWhenDone = true + } + + err = fc.DecompressFile(in, out) + if err == nil && deleteWhenDone { + err = os.Remove(in) + } + + default: + fatalf("unrecognized command: %s", flag.Arg(0)) + } + if err != nil { + fatal(err) + } +} + +func getFormat(subcommand string) (interface{}, error) { + formatPos := 1 + if subcommand == "compress" { + formatPos = 2 + } + // figure out which file format we're working with var ext string - archiveName := flag.Arg(1) + archiveName := flag.Arg(formatPos) for _, format := range supportedFormats { - if strings.HasSuffix(archiveName, format) { + // match by extension, or, in the case of 'compress', + // check the format without the leading dot; it allows + // a shortcut to specify a format while replacing + // the original file on disk + if strings.HasSuffix(archiveName, format) || + (subcommand == "compress" && + archiveName == strings.TrimPrefix(format, ".")) { ext = format break } @@ -67,6 +206,7 @@ func main() { ImplicitTopLevelFolder: implicitTopLevelFolder, ContinueOnError: continueOnError, } + switch ext { case ".rar": iface = &archive.Rar{ @@ -118,7 +258,7 @@ func main() { } case ".zip": - iface = archive.Zip{ + iface = &archive.Zip{ CompressionLevel: compressionLevel, OverwriteExisting: overwriteExisting, MkdirAll: mkdirAll, @@ -127,95 +267,36 @@ func main() { ContinueOnError: continueOnError, } - default: - archiveExt := filepath.Ext(archiveName) - if archiveExt == "" { - fatal("format missing (use file extension to specify archive/compression format)") - } else { - fatalf("unsupported format '%s'", archiveExt) - } - } - - var err error - - switch flag.Arg(0) { - case "archive": - a, ok := iface.(archive.Archiver) - if !ok { - fatalf("the archive command does not support the %s format", iface) - } - err = a.Archive(flag.Args()[2:], flag.Arg(1)) - - case "unarchive": - a, ok := iface.(archive.Unarchiver) - if !ok { - fatalf("the unarchive command does not support the %s format", iface) + case ".gz": + iface = &archive.Gz{ + CompressionLevel: compressionLevel, } - err = a.Unarchive(flag.Arg(1), flag.Arg(2)) - case "extract": - e, ok := iface.(archive.Extractor) - if !ok { - fatalf("the unarchive command does not support the %s format", iface) + case ".bz2": + iface = &archive.Bz2{ + CompressionLevel: compressionLevel, } - err = e.Extract(flag.Arg(1), flag.Arg(2), flag.Arg(3)) - case "ls": - w, ok := iface.(archive.Walker) - if !ok { - fatalf("the unarchive command does not support the %s format", iface) + case ".lz4": + iface = &archive.Lz4{ + CompressionLevel: compressionLevel, } - var count int - err = w.Walk(flag.Arg(1), func(f archive.File) error { - count++ - switch h := f.Header.(type) { - case *zip.FileHeader: - fmt.Printf("%s\t%d\t%d\t%s\t%s\n", - f.Mode(), - h.Method, - f.Size(), - f.ModTime(), - h.Name, - ) - case *tar.Header: - fmt.Printf("%s\t%s\t%s\t%d\t%s\t%s\n", - f.Mode(), - h.Uname, - h.Gname, - f.Size(), - f.ModTime(), - h.Name, - ) - - case *rardecode.FileHeader: - fmt.Printf("%s\t%d\t%d\t%s\t%s\n", - f.Mode(), - int(h.HostOS), - f.Size(), - f.ModTime(), - h.Name, - ) - - default: - fmt.Printf("%s\t%d\t%s\t?/%s\n", - f.Mode(), - f.Size(), - f.ModTime(), - f.Name(), // we don't know full path from this - ) - } - return nil - }) + case ".sz": + iface = &archive.Snappy{} - fmt.Printf("total %d", count) + case ".xz": + iface = &archive.Xz{} default: - fatalf("unrecognized command: %s", flag.Arg(0)) - } - if err != nil { - fatal(err) + archiveExt := filepath.Ext(archiveName) + if archiveExt == "" { + return nil, fmt.Errorf("format missing (use file extension to specify archive/compression format)") + } + return nil, fmt.Errorf("unsupported format '%s'", archiveExt) } + + return iface, nil } func fatal(v ...interface{}) { @@ -228,6 +309,14 @@ func fatalf(s string, v ...interface{}) { os.Exit(1) } +func usageString() string { + buf := new(bytes.Buffer) + buf.WriteString(usage) + flag.CommandLine.SetOutput(buf) + flag.CommandLine.PrintDefaults() + return buf.String() +} + // supportedFormats is the list of recognized // file extensions. They are in an ordered slice // because ordering is important, since some @@ -241,7 +330,11 @@ var supportedFormats = []string{ ".rar", ".tar", ".zip", - // TODO: add compression formats + ".gz", + ".bz2", + ".lz4", + ".sz", + ".xz", } const usage = `Usage: arc {archive|unarchive|extract|ls|help} [files...] @@ -277,10 +370,26 @@ const usage = `Usage: arc {archive|unarchive|extract|ls|help} [fi .tar.sz .tsz .rar (open only) + .bz2 + .gz + .lz4 + .sz + .xz + + (DE)COMPRESSING SINGLE FILES + Some formats are compression-only, and can be used + with the compress and decompress commands on a + single file; they do not bundle multiple files. + + To replace a file when compressing, specify the + source file name for the first argument, and the + compression format (without leading dot) for the + second argument. To replace a file when decompressing, + specify only the source file and no destination. PASSWORD-PROTECTED RAR FILES Export the ARCHIVE_PASSWORD environment variable - to be able to open password-protected RAR archives. + to be able to open password-protected rar archives. GLOBAL FLAG REFERENCE The following global flags may be used before the diff --git a/archive/filecompressor.go b/archive/filecompressor.go new file mode 100644 index 00000000..e9a04062 --- /dev/null +++ b/archive/filecompressor.go @@ -0,0 +1,66 @@ +package archive + +import ( + "fmt" + "os" +) + +// FileCompressor can compress and decompress single files. +type FileCompressor struct { + Compressor + Decompressor + + OverwriteExisting bool +} + +// CompressFile reads the source file and compresses it to destination. +// The destination must have a matching extension. +func (fc FileCompressor) CompressFile(source, destination string) error { + if err := fc.CheckExt(destination); err != nil { + return err + } + if fc.Compressor == nil { + return fmt.Errorf("no compressor specified") + } + if !fc.OverwriteExisting && fileExists(destination) { + return fmt.Errorf("file exists: %s", destination) + } + + in, err := os.Open(source) + if err != nil { + return err + } + defer in.Close() + + out, err := os.Create(destination) + if err != nil { + return err + } + defer out.Close() + + return fc.Compress(in, out) +} + +// DecompressFile reads the source file and decompresses it to destination. +func (fc FileCompressor) DecompressFile(source, destination string) error { + if fc.Decompressor == nil { + return fmt.Errorf("no decompressor specified") + } + if !fc.OverwriteExisting && fileExists(destination) { + return fmt.Errorf("file exists: %s", destination) + } + + in, err := os.Open(source) + if err != nil { + return err + } + defer in.Close() + + out, err := os.Create(destination) + if err != nil { + return err + } + defer out.Close() + + return fc.Decompress(in, out) +} diff --git a/archive/gz.go b/archive/gz.go new file mode 100644 index 00000000..64bd9fce --- /dev/null +++ b/archive/gz.go @@ -0,0 +1,51 @@ +package archive + +import ( + "compress/gzip" + "fmt" + "io" + "path/filepath" +) + +// Gz facilitates gzip compression. +type Gz struct { + CompressionLevel int +} + +// Compress reads in, compresses it, and writes it to out. +func (gz *Gz) Compress(in io.Reader, out io.Writer) error { + w, err := gzip.NewWriterLevel(out, gz.CompressionLevel) + if err != nil { + return err + } + defer w.Close() + _, err = io.Copy(w, in) + return err +} + +// Decompress reads in, decompresses it, and writes it to out. +func (gz *Gz) Decompress(in io.Reader, out io.Writer) error { + r, err := gzip.NewReader(in) + if err != nil { + return err + } + defer r.Close() + _, err = io.Copy(out, r) + return err +} + +// CheckExt ensures the file extension matches the format. +func (gz *Gz) CheckExt(filename string) error { + if filepath.Ext(filename) != ".gz" { + return fmt.Errorf("filename must have a .gz extension") + } + return nil +} + +func (gz *Gz) String() string { return "gz" } + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Compressor(new(Gz)) + _ = Decompressor(new(Gz)) +) diff --git a/archive/lz4.go b/archive/lz4.go new file mode 100644 index 00000000..ccdd621e --- /dev/null +++ b/archive/lz4.go @@ -0,0 +1,46 @@ +package archive + +import ( + "fmt" + "io" + "path/filepath" + + "github.com/pierrec/lz4" +) + +// Lz4 facilitates LZ4 compression. +type Lz4 struct { + CompressionLevel int +} + +// Compress reads in, compresses it, and writes it to out. +func (lz *Lz4) Compress(in io.Reader, out io.Writer) error { + w := lz4.NewWriter(out) + w.Header.CompressionLevel = lz.CompressionLevel + defer w.Close() + _, err := io.Copy(w, in) + return err +} + +// Decompress reads in, decompresses it, and writes it to out. +func (lz *Lz4) Decompress(in io.Reader, out io.Writer) error { + r := lz4.NewReader(in) + _, err := io.Copy(out, r) + return err +} + +// CheckExt ensures the file extension matches the format. +func (lz *Lz4) CheckExt(filename string) error { + if filepath.Ext(filename) != ".lz4" { + return fmt.Errorf("filename must have a .lz4 extension") + } + return nil +} + +func (lz *Lz4) String() string { return "lz4" } + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Compressor(new(Lz4)) + _ = Decompressor(new(Lz4)) +) diff --git a/archive/sz.go b/archive/sz.go new file mode 100644 index 00000000..2fa475ea --- /dev/null +++ b/archive/sz.go @@ -0,0 +1,43 @@ +package archive + +import ( + "fmt" + "io" + "path/filepath" + + "github.com/golang/snappy" +) + +// Snappy facilitates Snappy compression. +type Snappy struct{} + +// Compress reads in, compresses it, and writes it to out. +func (s *Snappy) Compress(in io.Reader, out io.Writer) error { + w := snappy.NewWriter(out) + defer w.Close() + _, err := io.Copy(w, in) + return err +} + +// Decompress reads in, decompresses it, and writes it to out. +func (s *Snappy) Decompress(in io.Reader, out io.Writer) error { + r := snappy.NewReader(in) + _, err := io.Copy(out, r) + return err +} + +// CheckExt ensures the file extension matches the format. +func (s *Snappy) CheckExt(filename string) error { + if filepath.Ext(filename) != ".sz" { + return fmt.Errorf("filename must have a .sz extension") + } + return nil +} + +func (s *Snappy) String() string { return "sz" } + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Compressor(new(Snappy)) + _ = Decompressor(new(Snappy)) +) diff --git a/archive/xz.go b/archive/xz.go new file mode 100644 index 00000000..c638c1c8 --- /dev/null +++ b/archive/xz.go @@ -0,0 +1,50 @@ +package archive + +import ( + "fmt" + "io" + "path/filepath" + + "github.com/ulikunitz/xz" + fastxz "github.com/xi2/xz" +) + +// Xz facilitates XZ compression. +type Xz struct{} + +// Compress reads in, compresses it, and writes it to out. +func (x *Xz) Compress(in io.Reader, out io.Writer) error { + w, err := xz.NewWriter(out) + if err != nil { + return err + } + defer w.Close() + _, err = io.Copy(w, in) + return err +} + +// Decompress reads in, decompresses it, and writes it to out. +func (x *Xz) Decompress(in io.Reader, out io.Writer) error { + r, err := fastxz.NewReader(in, 0) + if err != nil { + return err + } + _, err = io.Copy(out, r) + return err +} + +// CheckExt ensures the file extension matches the format. +func (x *Xz) CheckExt(filename string) error { + if filepath.Ext(filename) != ".xz" { + return fmt.Errorf("filename must have a .xz extension") + } + return nil +} + +func (x *Xz) String() string { return "xz" } + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Compressor(new(Xz)) + _ = Decompressor(new(Xz)) +) From 779b64ce54c5682495ab3b75e43b223a2c9677e5 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 13:27:46 -0700 Subject: [PATCH 11/18] Keep original package name (archiver) --- {archive => archiver}/archive.go | 2 +- {archive => archiver}/archive_test.go | 2 +- {archive => archiver}/bz2.go | 2 +- {archive => archiver}/cmd/arc/main.go | 47 ++++++++++++------------- {archive => archiver}/filecompressor.go | 2 +- {archive => archiver}/gz.go | 2 +- {archive => archiver}/lz4.go | 2 +- {archive => archiver}/rar.go | 2 +- {archive => archiver}/sz.go | 2 +- {archive => archiver}/tar.go | 2 +- {archive => archiver}/tarbz2.go | 2 +- {archive => archiver}/targz.go | 2 +- {archive => archiver}/tarlz4.go | 2 +- {archive => archiver}/tarsz.go | 2 +- {archive => archiver}/tarxz.go | 2 +- {archive => archiver}/xz.go | 2 +- {archive => archiver}/zip.go | 2 +- 17 files changed, 39 insertions(+), 40 deletions(-) rename {archive => archiver}/archive.go (99%) rename {archive => archiver}/archive_test.go (99%) rename {archive => archiver}/bz2.go (98%) rename {archive => archiver}/cmd/arc/main.go (91%) rename {archive => archiver}/filecompressor.go (98%) rename {archive => archiver}/gz.go (98%) rename {archive => archiver}/lz4.go (98%) rename {archive => archiver}/rar.go (99%) rename {archive => archiver}/sz.go (98%) rename {archive => archiver}/tar.go (99%) rename {archive => archiver}/tarbz2.go (99%) rename {archive => archiver}/targz.go (99%) rename {archive => archiver}/tarlz4.go (99%) rename {archive => archiver}/tarsz.go (99%) rename {archive => archiver}/tarxz.go (99%) rename {archive => archiver}/xz.go (98%) rename {archive => archiver}/zip.go (99%) diff --git a/archive/archive.go b/archiver/archive.go similarity index 99% rename from archive/archive.go rename to archiver/archive.go index 99601f77..0e8170aa 100644 --- a/archive/archive.go +++ b/archiver/archive.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "fmt" diff --git a/archive/archive_test.go b/archiver/archive_test.go similarity index 99% rename from archive/archive_test.go rename to archiver/archive_test.go index 2fc46feb..937a380d 100644 --- a/archive/archive_test.go +++ b/archiver/archive_test.go @@ -1,4 +1,4 @@ -package archive +package archiver import "testing" diff --git a/archive/bz2.go b/archiver/bz2.go similarity index 98% rename from archive/bz2.go rename to archiver/bz2.go index d29366b1..5a914bc3 100644 --- a/archive/bz2.go +++ b/archiver/bz2.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "fmt" diff --git a/archive/cmd/arc/main.go b/archiver/cmd/arc/main.go similarity index 91% rename from archive/cmd/arc/main.go rename to archiver/cmd/arc/main.go index 2d848262..bb973f57 100644 --- a/archive/cmd/arc/main.go +++ b/archiver/cmd/arc/main.go @@ -11,9 +11,8 @@ import ( "path/filepath" "strings" + "github.com/mholt/archiver/archiver" "github.com/nwaples/rardecode" - - "github.com/mholt/archiver/archive" ) var ( @@ -56,34 +55,34 @@ func main() { // run the desired command switch subcommand { case "archive": - a, ok := iface.(archive.Archiver) + a, ok := iface.(archiver.Archiver) if !ok { fatalf("the archive command does not support the %s format", iface) } err = a.Archive(flag.Args()[2:], flag.Arg(1)) case "unarchive": - a, ok := iface.(archive.Unarchiver) + a, ok := iface.(archiver.Unarchiver) if !ok { fatalf("the unarchive command does not support the %s format", iface) } err = a.Unarchive(flag.Arg(1), flag.Arg(2)) case "extract": - e, ok := iface.(archive.Extractor) + e, ok := iface.(archiver.Extractor) if !ok { fatalf("the extract command does not support the %s format", iface) } err = e.Extract(flag.Arg(1), flag.Arg(2), flag.Arg(3)) case "ls": - w, ok := iface.(archive.Walker) + w, ok := iface.(archiver.Walker) if !ok { fatalf("the ls command does not support the %s format", iface) } var count int - err = w.Walk(flag.Arg(1), func(f archive.File) error { + err = w.Walk(flag.Arg(1), func(f archiver.File) error { count++ switch h := f.Header.(type) { case *zip.FileHeader: @@ -127,11 +126,11 @@ func main() { fmt.Printf("total %d", count) case "compress": - c, ok := iface.(archive.Compressor) + c, ok := iface.(archiver.Compressor) if !ok { fatalf("the compress command does not support the %s format", iface) } - fc := archive.FileCompressor{Compressor: c} + fc := archiver.FileCompressor{Compressor: c} in := flag.Arg(1) out := flag.Arg(2) @@ -148,11 +147,11 @@ func main() { } case "decompress": - c, ok := iface.(archive.Decompressor) + c, ok := iface.(archiver.Decompressor) if !ok { fatalf("the compress command does not support the %s format", iface) } - fc := archive.FileCompressor{Decompressor: c} + fc := archiver.FileCompressor{Decompressor: c} in := flag.Arg(1) out := flag.Arg(2) @@ -200,7 +199,7 @@ func getFormat(subcommand string) (interface{}, error) { // configure an archiver var iface interface{} - mytar := &archive.Tar{ + mytar := &archiver.Tar{ OverwriteExisting: overwriteExisting, MkdirAll: mkdirAll, ImplicitTopLevelFolder: implicitTopLevelFolder, @@ -209,7 +208,7 @@ func getFormat(subcommand string) (interface{}, error) { switch ext { case ".rar": - iface = &archive.Rar{ + iface = &archiver.Rar{ OverwriteExisting: overwriteExisting, MkdirAll: mkdirAll, ImplicitTopLevelFolder: implicitTopLevelFolder, @@ -223,14 +222,14 @@ func getFormat(subcommand string) (interface{}, error) { case ".tbz2": fallthrough case ".tar.bz2": - iface = &archive.TarBz2{ + iface = &archiver.TarBz2{ Tar: mytar, } case ".tgz": fallthrough case ".tar.gz": - iface = &archive.TarGz{ + iface = &archiver.TarGz{ Tar: mytar, CompressionLevel: compressionLevel, } @@ -238,7 +237,7 @@ func getFormat(subcommand string) (interface{}, error) { case ".tlz4": fallthrough case ".tar.lz4": - iface = &archive.TarLz4{ + iface = &archiver.TarLz4{ Tar: mytar, CompressionLevel: compressionLevel, } @@ -246,19 +245,19 @@ func getFormat(subcommand string) (interface{}, error) { case ".tsz": fallthrough case ".tar.sz": - iface = &archive.TarSz{ + iface = &archiver.TarSz{ Tar: mytar, } case ".txz": fallthrough case ".tar.xz": - iface = &archive.TarXz{ + iface = &archiver.TarXz{ Tar: mytar, } case ".zip": - iface = &archive.Zip{ + iface = &archiver.Zip{ CompressionLevel: compressionLevel, OverwriteExisting: overwriteExisting, MkdirAll: mkdirAll, @@ -268,25 +267,25 @@ func getFormat(subcommand string) (interface{}, error) { } case ".gz": - iface = &archive.Gz{ + iface = &archiver.Gz{ CompressionLevel: compressionLevel, } case ".bz2": - iface = &archive.Bz2{ + iface = &archiver.Bz2{ CompressionLevel: compressionLevel, } case ".lz4": - iface = &archive.Lz4{ + iface = &archiver.Lz4{ CompressionLevel: compressionLevel, } case ".sz": - iface = &archive.Snappy{} + iface = &archiver.Snappy{} case ".xz": - iface = &archive.Xz{} + iface = &archiver.Xz{} default: archiveExt := filepath.Ext(archiveName) diff --git a/archive/filecompressor.go b/archiver/filecompressor.go similarity index 98% rename from archive/filecompressor.go rename to archiver/filecompressor.go index e9a04062..df881468 100644 --- a/archive/filecompressor.go +++ b/archiver/filecompressor.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "fmt" diff --git a/archive/gz.go b/archiver/gz.go similarity index 98% rename from archive/gz.go rename to archiver/gz.go index 64bd9fce..7325c0f5 100644 --- a/archive/gz.go +++ b/archiver/gz.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "compress/gzip" diff --git a/archive/lz4.go b/archiver/lz4.go similarity index 98% rename from archive/lz4.go rename to archiver/lz4.go index ccdd621e..5291791a 100644 --- a/archive/lz4.go +++ b/archiver/lz4.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "fmt" diff --git a/archive/rar.go b/archiver/rar.go similarity index 99% rename from archive/rar.go rename to archiver/rar.go index 8bfc99c7..6124aa3f 100644 --- a/archive/rar.go +++ b/archiver/rar.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "fmt" diff --git a/archive/sz.go b/archiver/sz.go similarity index 98% rename from archive/sz.go rename to archiver/sz.go index 2fa475ea..9e9fcd19 100644 --- a/archive/sz.go +++ b/archiver/sz.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "fmt" diff --git a/archive/tar.go b/archiver/tar.go similarity index 99% rename from archive/tar.go rename to archiver/tar.go index 245437da..cedc2ce2 100644 --- a/archive/tar.go +++ b/archiver/tar.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "archive/tar" diff --git a/archive/tarbz2.go b/archiver/tarbz2.go similarity index 99% rename from archive/tarbz2.go rename to archiver/tarbz2.go index a9f3c9a4..2b44bf4b 100644 --- a/archive/tarbz2.go +++ b/archiver/tarbz2.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "fmt" diff --git a/archive/targz.go b/archiver/targz.go similarity index 99% rename from archive/targz.go rename to archiver/targz.go index fb90ccc2..513e71ed 100644 --- a/archive/targz.go +++ b/archiver/targz.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "compress/gzip" diff --git a/archive/tarlz4.go b/archiver/tarlz4.go similarity index 99% rename from archive/tarlz4.go rename to archiver/tarlz4.go index 7db6ac74..10be5f26 100644 --- a/archive/tarlz4.go +++ b/archiver/tarlz4.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "fmt" diff --git a/archive/tarsz.go b/archiver/tarsz.go similarity index 99% rename from archive/tarsz.go rename to archiver/tarsz.go index 3d1c6ed9..4533c3df 100644 --- a/archive/tarsz.go +++ b/archiver/tarsz.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "fmt" diff --git a/archive/tarxz.go b/archiver/tarxz.go similarity index 99% rename from archive/tarxz.go rename to archiver/tarxz.go index fdc7919a..c1d27ea9 100644 --- a/archive/tarxz.go +++ b/archiver/tarxz.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "fmt" diff --git a/archive/xz.go b/archiver/xz.go similarity index 98% rename from archive/xz.go rename to archiver/xz.go index c638c1c8..f5f5b81e 100644 --- a/archive/xz.go +++ b/archiver/xz.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "fmt" diff --git a/archive/zip.go b/archiver/zip.go similarity index 99% rename from archive/zip.go rename to archiver/zip.go index 9ba9be18..c9af4765 100644 --- a/archive/zip.go +++ b/archiver/zip.go @@ -1,4 +1,4 @@ -package archive +package archiver import ( "archive/zip" From b3459cff3daa4735805466c71e8a6a030f54df2f Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 13:55:49 -0700 Subject: [PATCH 12/18] Update README --- README.md | 117 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 85 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index f2d94f00..783ff9b2 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,58 @@ archiver [![archiver GoDoc](https://img.shields.io/badge/reference-godoc-blue.svg?style=flat-square)](https://godoc.org/github.com/mholt/archiver) [![Linux Build Status](https://img.shields.io/travis/mholt/archiver.svg?style=flat-square&label=linux+build)](https://travis-ci.org/mholt/archiver) [![Windows Build Status](https://img.shields.io/appveyor/ci/mholt/archiver.svg?style=flat-square&label=windows+build)](https://ci.appveyor.com/project/mholt/archiver) ======== -Package archiver makes it trivially easy to make and extract common archive formats such as .zip, and .tar.gz. Simply name the input and output file(s). +Introducing **Archiver 3.0** - a cross-platform, multi-format archive utility and Go library. A powerful and flexible library meets an elegant CLI in this generic replacement for several of platform-specific, format-specific archive utilities. + +## Features + +Package archiver makes it trivially easy to make and extract common archive formats such as zip and tarball (and its compressed variants). Simply name the input and output file(s). The `arc` command runs the same on all platforms and has no external dependencies (not even libc). It is powered by the Go standard library and several third-party, pure-Go libraries. Files are put into the root of the archive; directories are recursively added, preserving structure. -The `archiver` command runs the same cross-platform and has no external dependencies (not even libc); powered by the Go standard library, [dsnet/compress](https://github.com/dsnet/compress), [nwaples/rardecode](https://github.com/nwaples/rardecode), and [ulikunitz/xz](https://github.com/ulikunitz/xz). Enjoy! +- Make whole archives from a list of files +- Open whole archives to a folder +- Extract specific files/folders from archives +- Stream files in and out of archives without needing actual files on disk +- Traverse archive contents without loading them +- Compress files +- Decompress files +- Streaming compression and decompression +- Several archive and compression formats supported + +### Format-dependent features -Supported formats/extensions: +- Optionally create a top-level folder to avoid littering a directory or archive root with files +- Toggle overwrite existing files +- Adjust compression level +- Zip: store (not compress) already-compressed files +- Make all necessary directories +- Open password-protected RAR archives +- Optionally continue with other files after an error + +### Supported archive formats - .zip - .tar -- .tar.gz & .tgz -- .tar.bz2 & .tbz2 -- .tar.xz & .txz -- .tar.lz4 & .tlz4 -- .tar.sz & .tsz +- .tar.gz or .tgz +- .tar.bz2 or .tbz2 +- .tar.xz or .txz +- .tar.lz4 or .tlz4 +- .tar.sz or .tsz - .rar (open only) +### Supported compression formats + +- bzip2 +- gzip +- lz4 +- snappy +- xz + ## Install ```bash -go get github.com/mholt/archiver/cmd/archiver +go get -u github.com/mholt/archiver/cmd/archiver ``` Or download binaries from the [releases](https://github.com/mholt/archiver/releases) page. @@ -30,70 +60,93 @@ Or download binaries from the [releases](https://github.com/mholt/archiver/relea ## Command Use -Make a new archive: +### Make new archive ```bash -$ archiver make [archive name] [input files...] +# Syntax: arc archive [archive name] [input files...] +$ arc archive test.tar.gz file1.txt images/file2.jpg folder/subfolder ``` (At least one input file is required.) -To extract an archive: +### Extract entire archive ```bash -$ archiver open [archive name] [destination] +# Syntax: arc unarchive [archive name] [destination] +$ arc unarchive test.tar.gz ``` (The destination path is optional; default is current directory.) -The archive name must end with a supported file extension—this is how it knows what kind of archive to make. Run `archiver -h` for more help. +The archive name must end with a supported file extension—this is how it knows what kind of archive to make. Run `arc help` for more help. +### List archive contents -## Library Use - -```go -import "github.com/mholt/archiver" +```bash +# Syntax: arc ls [archive name] +$ arc ls caddy_dist.tar.gz +drwxr-xr-x matt staff 0 2018-09-19 15:47:18 -0600 MDT dist/ +-rw-r--r-- matt staff 6148 2017-08-07 18:34:22 -0600 MDT dist/.DS_Store +-rw-r--r-- matt staff 22481 2018-09-19 15:47:18 -0600 MDT dist/CHANGES.txt +-rw-r--r-- matt staff 17189 2018-09-19 15:47:18 -0600 MDT dist/EULA.txt +-rw-r--r-- matt staff 25261 2016-03-07 16:32:00 -0700 MST dist/LICENSES.txt +-rw-r--r-- matt staff 1017 2018-09-19 15:47:18 -0600 MDT dist/README.txt +-rw-r--r-- matt staff 288 2016-03-21 11:52:38 -0600 MDT dist/gitcookie.sh.enc +... ``` -Create a .zip file: +### Extract a specific file or folder from an archive -```go -err := archiver.Zip.Make("output.zip", []string{"file.txt", "folder"}) +```bash +# Syntax: arc extract [archive name] [path in archive] [destination on disk] +$ arc extract test.tar.gz foo/hello.txt extracted/hello.txt ``` -Extract a .zip file: +### Compress a single file -```go -err := archiver.Zip.Open("input.zip", "output_folder") +```bash +# Syntax: arc compress [input file] [output file] +$ arc compress test.txt compressed_test.txt.gz +$ arc compress test.txt gz ``` -Working with other file formats is exactly the same, but with [their own Archiver implementations](https://godoc.org/github.com/mholt/archiver#Archiver). +For convenience, if the output file is simply a compression format (without leading dot), the output file name will be the same as the input name but with the format extension appended, and the input file will be deleted if successful. +### Decompress a single file +```bash +# Syntax: arc decompress [input file] [output file] +$ arc decompress test.txt.gz original_test.txt +$ arc decompress test.txt.gz +``` -## FAQ +For convenience, if the output file is not specified, it will have the same name as the input, but with the compression extension stripped from the end, and the input file will be deleted if successful. -#### Can I list a file in one folder to go into a different folder in the archive? +### Flags -No. This works just like your OS would make an archive in the file explorer: organize your input files to mirror the structure you want in the archive. +Flags are specified before the subcommand. Use `arc help` or `arc -h` to get usage help and a description of flags with their default values. +## Library Use + +```go +import "github.com/mholt/archiver" +``` -#### Can it add files to an existing archive? +The archiver package allows you to easily create and open archives, walk their contents, extract specific files, compress and decompress files, and even stream archives in and out using pure io.Reader and io.Writer interfaces, without ever needing to touch the disk. See [package godoc documentation](https://godoc.org/github.com/mholt/archiver) to learn how to do this -- it's really slick! -Nope. This is a simple tool; it just makes new archives or extracts existing ones. ## Project Values This project has a few principle-based goals that guide its development: -- **Do one thing really well.** That is creating and opening archive files. It is not meant to be a replacement for specific archive format tools like tar, zip, etc. that have lots of features and customizability. (Some customizability is OK, but not to the extent that it becomes complicated or error-prone.) +- **Do our thing really well.** Our thing is creating, opening, inspecting, compressing, and streaming archive files. It is not meant to be a replacement for specific archive format tools like tar, zip, etc. that have lots of features and customizability. (Some customizability is OK, but not to the extent that it becomes overly complicated or error-prone.) - **Have good tests.** Changes should be covered by tests. - **Limit dependencies.** Keep the package lightweight. -- **Pure Go.** This means no cgo or other external/system dependencies. This package should be able to stand on its own and cross-compile easily to any platform. +- **Pure Go.** This means no cgo or other external/system dependencies. This package should be able to stand on its own and cross-compile easily to any platform -- and that includes its library dependencies. - **Idiomatic Go.** Keep interfaces small, variable names semantic, vet shows no errors, the linter is generally quiet, etc. From e54b7d7a097ca6bf028ecd127b325409cb80c594 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 19:59:09 -0700 Subject: [PATCH 13/18] tar/zip: Fix a couple bugs and edge cases The archivers' walks now skip the output archive, if contained in one of the source directory trees. For zip files, it would fill the disk and for tar files it would be included in the archive, about 10 KB of it... Tar now closes the Reader/Writer wrapper last, after the inner tar writer/reader has been closed. Otherwise result is a corrupted archive that could be opened by archiver, but not by the OS. --- archiver/archive.go | 5 +++++ archiver/cmd/arc/main.go | 6 +++++- archiver/tar.go | 40 +++++++++++++++++++++++++++------------- archiver/zip.go | 29 ++++++++++++++++++++--------- 4 files changed, 57 insertions(+), 23 deletions(-) diff --git a/archiver/archive.go b/archiver/archive.go index 0e8170aa..d44067d0 100644 --- a/archiver/archive.go +++ b/archiver/archive.go @@ -187,6 +187,8 @@ func within(parent, sub string) bool { return !strings.Contains(rel, "..") } +// multipleTopLevels returns true if the paths do not +// share a common top-level folder. func multipleTopLevels(paths []string) bool { if len(paths) < 2 { return false @@ -211,6 +213,9 @@ func multipleTopLevels(paths []string) bool { return false } +// folderNameFromFileName returns a name for a folder +// that is suitable based on the filename, which will +// be stripped of its extensions. func folderNameFromFileName(filename string) string { base := filepath.Base(filename) firstDot := strings.Index(base, ".") diff --git a/archiver/cmd/arc/main.go b/archiver/cmd/arc/main.go index bb973f57..19ca4d9e 100644 --- a/archiver/cmd/arc/main.go +++ b/archiver/cmd/arc/main.go @@ -336,7 +336,7 @@ var supportedFormats = []string{ ".xz", } -const usage = `Usage: arc {archive|unarchive|extract|ls|help} [files...] +const usage = `Usage: arc {archive|unarchive|extract|ls|compress|decompress|help} [arguments...] archive Create a new archive file. List the files/folders to include in the archive; at least one required. @@ -350,6 +350,10 @@ const usage = `Usage: arc {archive|unarchive|extract|ls|help} [fi archive is required), and third is destination. ls List the contents of the archive. + compress + Compresses a file, destination optional. + decompress + Decompresses a file, destination optional. help Display this help text. Also -h or --help. diff --git a/archiver/tar.go b/archiver/tar.go index cedc2ce2..7fc1ea5f 100644 --- a/archiver/tar.go +++ b/archiver/tar.go @@ -91,7 +91,7 @@ func (t *Tar) Archive(sources []string, destination string) error { } for _, source := range sources { - err := t.writeWalk(source, topLevelFolder) + err := t.writeWalk(source, topLevelFolder, destination) if err != nil { return fmt.Errorf("walking %s: %v", source, err) } @@ -234,7 +234,7 @@ func (t *Tar) untarFile(f File, to string) error { } } -func (t *Tar) writeWalk(source, topLevelFolder string) error { +func (t *Tar) writeWalk(source, topLevelFolder, destination string) error { sourceAbs, err := filepath.Abs(source) if err != nil { return fmt.Errorf("getting absolute path: %v", err) @@ -243,6 +243,10 @@ func (t *Tar) writeWalk(source, topLevelFolder string) error { if err != nil { return fmt.Errorf("%s: stat: %v", source, err) } + destAbs, err := filepath.Abs(destination) + if err != nil { + return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err) + } var baseDir string if topLevelFolder != "" { @@ -267,14 +271,20 @@ func (t *Tar) writeWalk(source, topLevelFolder string) error { return handleErr(fmt.Errorf("no file info")) } - name := source - if source != fpath { - name, err = filepath.Rel(source, fpath) - if err != nil { - return handleErr(err) - } + // make sure we do not copy our output file into itself + fpathAbs, err := filepath.Abs(fpath) + if err != nil { + return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err)) + } + if within(fpathAbs, destAbs) { + return nil } + // build the name to be used in the archive + name, err := filepath.Rel(source, fpath) + if err != nil { + return handleErr(err) + } nameInArchive := path.Join(baseDir, filepath.ToSlash(name)) file, err := os.Open(fpath) @@ -400,18 +410,22 @@ func (t *Tar) Read() (File, error) { // Close closes the tar archive(s) opened by Create and Open. func (t *Tar) Close() error { - if t.cleanupWrapFn != nil { - t.cleanupWrapFn() - } + var err error if t.tr != nil { t.tr = nil } if t.tw != nil { tw := t.tw t.tw = nil - return tw.Close() + err = tw.Close() } - return nil + // make sure cleanup of "Reader/Writer wrapper" + // (say that ten times fast) happens AFTER the + // underlying stream is closed + if t.cleanupWrapFn != nil { + t.cleanupWrapFn() + } + return err } // Walk calls walkFn for each visited item in archive. diff --git a/archiver/zip.go b/archiver/zip.go index c9af4765..a0d8b65f 100644 --- a/archiver/zip.go +++ b/archiver/zip.go @@ -100,7 +100,7 @@ func (z *Zip) Archive(sources []string, destination string) error { } for _, source := range sources { - err := z.writeWalk(source, topLevelFolder) + err := z.writeWalk(source, topLevelFolder, destination) if err != nil { return fmt.Errorf("walking %s: %v", source, err) } @@ -193,7 +193,7 @@ func (z *Zip) extractFile(f File, to string) error { return writeNewFile(to, f, f.Mode()) } -func (z *Zip) writeWalk(source, topLevelFolder string) error { +func (z *Zip) writeWalk(source, topLevelFolder, destination string) error { sourceAbs, err := filepath.Abs(source) if err != nil { return fmt.Errorf("getting absolute path: %v", err) @@ -202,6 +202,10 @@ func (z *Zip) writeWalk(source, topLevelFolder string) error { if err != nil { return fmt.Errorf("%s: stat: %v", source, err) } + destAbs, err := filepath.Abs(destination) + if err != nil { + return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err) + } var baseDir string if topLevelFolder != "" { @@ -223,17 +227,24 @@ func (z *Zip) writeWalk(source, topLevelFolder string) error { return handleErr(fmt.Errorf("traversing %s: %v", fpath, err)) } if info == nil { - return handleErr(fmt.Errorf("no file info")) + return handleErr(fmt.Errorf("%s: no file info", fpath)) } - name := source - if source != fpath { - name, err = filepath.Rel(source, fpath) - if err != nil { - return handleErr(err) - } + // make sure we do not copy the output file into the output + // file; that results in an infinite loop and disk exhaustion! + fpathAbs, err := filepath.Abs(fpath) + if err != nil { + return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err)) + } + if within(fpathAbs, destAbs) { + return nil } + // build the name to be used within the archive + name, err := filepath.Rel(source, fpath) + if err != nil { + return handleErr(err) + } nameInArchive := path.Join(baseDir, filepath.ToSlash(name)) file, err := os.Open(fpath) From 9b3eda1e1ae94a76a9ef23183a645be0ca93c4b2 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 20:24:20 -0700 Subject: [PATCH 14/18] rar: Add support for multi-volume archives --- archiver/rar.go | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/archiver/rar.go b/archiver/rar.go index 6124aa3f..1356eba8 100644 --- a/archiver/rar.go +++ b/archiver/rar.go @@ -46,11 +46,13 @@ type Rar struct { // The password to open archives (optional). Password string - rr *rardecode.Reader + rr *rardecode.Reader // underlying stream reader + rc *rardecode.ReadCloser // supports multi-volume archives (files only) } // Unarchive unpacks the .rar file at source to destination. -// Destination will be treated as a folder name. +// Destination will be treated as a folder name. It supports +// multi-volume archives. func (r *Rar) Unarchive(source, destination string) error { if !fileExists(destination) && r.MkdirAll { err := mkdir(destination) @@ -70,13 +72,7 @@ func (r *Rar) Unarchive(source, destination string) error { } } - file, err := os.Open(source) - if err != nil { - return fmt.Errorf("opening source archive: %v", err) - } - defer file.Close() - - err = r.Open(file, 0) + err := r.OpenFile(source) if err != nil { return fmt.Errorf("opening rar archive for reading: %v", err) } @@ -110,14 +106,14 @@ func (r *Rar) addTopLevelFolder(sourceArchive, destination string) (string, erro } defer file.Close() - rr, err := rardecode.NewReader(file, r.Password) + rc, err := rardecode.NewReader(file, r.Password) if err != nil { return "", fmt.Errorf("creating archive reader: %v", err) } var files []string for { - hdr, err := rr.Next() + hdr, err := rc.Next() if err == io.EOF { break } @@ -167,6 +163,22 @@ func (r *Rar) unrarFile(f File, to string) error { return writeNewFile(to, r.rr, hdr.Mode()) } +// OpenFile opens filename for reading. This method supports +// multi-volume archives, whereas Open does not (but Open +// supports any stream, not just files). +func (r *Rar) OpenFile(filename string) error { + if r.rr != nil { + return fmt.Errorf("rar archive is already open for reading") + } + var err error + r.rc, err = rardecode.OpenReader(filename, r.Password) + if err != nil { + return err + } + r.rr = &r.rc.Reader + return nil +} + // Open opens t for reading an archive from // in. The size parameter is not used. func (r *Rar) Open(in io.Reader, size int64) error { @@ -203,7 +215,16 @@ func (r *Rar) Read() (File, error) { // Close closes the rar archive(s) opened by Create and Open. func (r *Rar) Close() error { - return nil + var err error + if r.rc != nil { + rc := r.rc + r.rc = nil + err = rc.Close() + } + if r.rr != nil { + r.rr = nil + } + return err } // Walk calls walkFn for each visited item in archive. From fea250ac6eacd56f90a82fbe2481cfdbb9a1bbd1 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 6 Nov 2018 21:06:08 -0700 Subject: [PATCH 15/18] Clarify opinion about zip-slip and give recommendation --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 783ff9b2..7a63fe16 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,7 @@ import "github.com/mholt/archiver" The archiver package allows you to easily create and open archives, walk their contents, extract specific files, compress and decompress files, and even stream archives in and out using pure io.Reader and io.Writer interfaces, without ever needing to touch the disk. See [package godoc documentation](https://godoc.org/github.com/mholt/archiver) to learn how to do this -- it's really slick! +**Security note: This package does NOT attempt to mitigate zip-slip attacks.** It is [extremely difficult](https://github.com/rubyzip/rubyzip/pull/376) [to do properly](https://github.com/mholt/archiver/pull/65#issuecomment-395988244) and [seemingly impossible to mitigate effectively across platforms](https://github.com/golang/go/issues/20126). [Attempted fixes have broken processing of legitimate files in production](https://github.com/mholt/archiver/pull/70#issuecomment-423267320), rendering the program unusable. Our recommendation instead is to inspect the contents of an untrusted archive before extracting it (this package provides `Walkers`) and decide if you want to proceed with extraction. ## Project Values From 48dff643ced6863febbb9539eadf7ead09bb7330 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Wed, 7 Nov 2018 12:12:33 -0700 Subject: [PATCH 16/18] Implement Match methods on tar, rar, and zip formats; port tests --- archiver/archive_test.go | 121 ----------- archiver/{archive.go => archiver.go} | 8 + archiver/archiver_test.go | 290 +++++++++++++++++++++++++++ archiver/rar.go | 24 +++ archiver/tar.go | 61 ++++++ archiver/zip.go | 22 ++ 6 files changed, 405 insertions(+), 121 deletions(-) delete mode 100644 archiver/archive_test.go rename archiver/{archive.go => archiver.go} (95%) create mode 100644 archiver/archiver_test.go diff --git a/archiver/archive_test.go b/archiver/archive_test.go deleted file mode 100644 index 937a380d..00000000 --- a/archiver/archive_test.go +++ /dev/null @@ -1,121 +0,0 @@ -package archiver - -import "testing" - -func TestWithin(t *testing.T) { - for i, tc := range []struct { - path1, path2 string - expect bool - }{ - { - path1: "/foo", - path2: "/foo/bar", - expect: true, - }, - { - path1: "/foo", - path2: "/foobar/asdf", - expect: false, - }, - { - path1: "/foobar/", - path2: "/foobar/asdf", - expect: true, - }, - { - path1: "/foobar/asdf", - path2: "/foobar", - expect: false, - }, - { - path1: "/foobar/asdf", - path2: "/foobar/", - expect: false, - }, - { - path1: "/", - path2: "/asdf", - expect: true, - }, - { - path1: "/asdf", - path2: "/asdf", - expect: true, - }, - { - path1: "/", - path2: "/", - expect: true, - }, - } { - actual := within(tc.path1, tc.path2) - if actual != tc.expect { - t.Errorf("Test %d: [%s %s] Expected %t but got %t", i, tc.path1, tc.path2, tc.expect, actual) - } - } -} - -func TestMultipleTopLevels(t *testing.T) { - for i, tc := range []struct { - set []string - expect bool - }{ - { - set: []string{}, - expect: false, - }, - { - set: []string{"/foo"}, - expect: false, - }, - { - set: []string{"/foo", "/foo/bar"}, - expect: false, - }, - { - set: []string{"/foo", "/bar"}, - expect: true, - }, - { - set: []string{"/foo", "/foobar"}, - expect: true, - }, - { - set: []string{"foo", "foo/bar"}, - expect: false, - }, - { - set: []string{"foo", "/foo/bar"}, - expect: false, - }, - { - set: []string{"../foo", "foo/bar"}, - expect: true, - }, - { - set: []string{`C:\foo\bar`, `C:\foo\bar\zee`}, - expect: false, - }, - { - set: []string{`C:\`, `C:\foo\bar`}, - expect: false, - }, - { - set: []string{`D:\foo`, `E:\foo`}, - expect: true, - }, - { - set: []string{`D:\foo`, `D:\foo\bar`, `C:\foo`}, - expect: true, - }, - { - set: []string{"/foo", "/", "/bar"}, - expect: true, - }, - } { - actual := multipleTopLevels(tc.set) - if actual != tc.expect { - t.Errorf("Test %d: %v: Expected %t but got %t", i, tc.set, tc.expect, actual) - } - } -} diff --git a/archiver/archive.go b/archiver/archiver.go similarity index 95% rename from archiver/archive.go rename to archiver/archiver.go index d44067d0..68c53d2a 100644 --- a/archiver/archive.go +++ b/archiver/archiver.go @@ -113,6 +113,14 @@ type Decompressor interface { Decompress(in io.Reader, out io.Writer) error } +// Matcher is a type that can return whether the given +// file appears to match the implementation's format. +// Implementations should return the file's read position +// to where it was when the method was called. +type Matcher interface { + Match(*os.File) (bool, error) +} + func fileExists(name string) bool { _, err := os.Stat(name) return !os.IsNotExist(err) diff --git a/archiver/archiver_test.go b/archiver/archiver_test.go new file mode 100644 index 00000000..dddb0bf9 --- /dev/null +++ b/archiver/archiver_test.go @@ -0,0 +1,290 @@ +package archiver + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "testing" +) + +func TestWithin(t *testing.T) { + for i, tc := range []struct { + path1, path2 string + expect bool + }{ + { + path1: "/foo", + path2: "/foo/bar", + expect: true, + }, + { + path1: "/foo", + path2: "/foobar/asdf", + expect: false, + }, + { + path1: "/foobar/", + path2: "/foobar/asdf", + expect: true, + }, + { + path1: "/foobar/asdf", + path2: "/foobar", + expect: false, + }, + { + path1: "/foobar/asdf", + path2: "/foobar/", + expect: false, + }, + { + path1: "/", + path2: "/asdf", + expect: true, + }, + { + path1: "/asdf", + path2: "/asdf", + expect: true, + }, + { + path1: "/", + path2: "/", + expect: true, + }, + { + path1: "/foo/bar/daa", + path2: "/foo", + expect: false, + }, + { + path1: "/foo/", + path2: "/foo/bar/daa", + expect: true, + }, + } { + actual := within(tc.path1, tc.path2) + if actual != tc.expect { + t.Errorf("Test %d: [%s %s] Expected %t but got %t", i, tc.path1, tc.path2, tc.expect, actual) + } + } +} + +func TestMultipleTopLevels(t *testing.T) { + for i, tc := range []struct { + set []string + expect bool + }{ + { + set: []string{}, + expect: false, + }, + { + set: []string{"/foo"}, + expect: false, + }, + { + set: []string{"/foo", "/foo/bar"}, + expect: false, + }, + { + set: []string{"/foo", "/bar"}, + expect: true, + }, + { + set: []string{"/foo", "/foobar"}, + expect: true, + }, + { + set: []string{"foo", "foo/bar"}, + expect: false, + }, + { + set: []string{"foo", "/foo/bar"}, + expect: false, + }, + { + set: []string{"../foo", "foo/bar"}, + expect: true, + }, + { + set: []string{`C:\foo\bar`, `C:\foo\bar\zee`}, + expect: false, + }, + { + set: []string{`C:\`, `C:\foo\bar`}, + expect: false, + }, + { + set: []string{`D:\foo`, `E:\foo`}, + expect: true, + }, + { + set: []string{`D:\foo`, `D:\foo\bar`, `C:\foo`}, + expect: true, + }, + { + set: []string{"/foo", "/", "/bar"}, + expect: true, + }, + } { + actual := multipleTopLevels(tc.set) + if actual != tc.expect { + t.Errorf("Test %d: %v: Expected %t but got %t", i, tc.set, tc.expect, actual) + } + } +} + +func TestArchiveUnarchive(t *testing.T) { + for _, af := range archiveFormats { + au, ok := af.(archiverUnarchiver) + if !ok { + t.Errorf("%s (%T): not an Archiver and Unarchiver", af, af) + continue + } + testArchiveUnarchive(t, au) + } +} + +func testArchiveUnarchive(t *testing.T, au archiverUnarchiver) { + auStr := fmt.Sprintf("%s", au) + + tmp, err := ioutil.TempDir("", "archiver_test") + if err != nil { + t.Fatalf("[%s] %v", auStr, err) + } + defer os.RemoveAll(tmp) + + // Test creating archive + outfile := filepath.Join(tmp, "archiver_test."+auStr) + err = au.Archive([]string{"testdata"}, outfile) + if err != nil { + t.Fatalf("[%s] making archive: didn't expect an error, but got: %v", auStr, err) + } + + // Test format matching (TODO: Make this its own test, out of band with the archive/unarchive tests) + //testMatching(t, au, outfile) // TODO: Disabled until we can finish implementing this for compressed tar formats + + // Test extracting archive + dest := filepath.Join(tmp, "extraction_test_"+auStr) + os.Mkdir(dest, 0755) + err = au.Unarchive(outfile, dest) + if err != nil { + t.Fatalf("[%s] extracting archive [%s -> %s]: didn't expect an error, but got: %v", auStr, outfile, dest, err) + } + + // Check that what was extracted is what was compressed + symmetricTest(t, auStr, dest) +} + +// testMatching tests that au can match the format of archiveFile. +func testMatching(t *testing.T, au archiverUnarchiver, archiveFile string) { + m, ok := au.(Matcher) + if !ok { + t.Logf("[NOTICE] %T (%s) is not a Matcher", au, au) + return + } + + file, err := os.Open(archiveFile) + if err != nil { + t.Fatalf("[%s] opening file for matching: %v", au, err) + } + defer file.Close() + + tmpBuf := make([]byte, 2048) + io.ReadFull(file, tmpBuf) + + matched, err := m.Match(file) + if err != nil { + t.Fatalf("%s (%T): testing matching: got error, expected none: %v", m, m, err) + } + if !matched { + t.Fatalf("%s (%T): format should have matched, but didn't", m, m) + } +} + +// symmetricTest compares the contents of a destination directory to the contents +// of the test corpus and tests that they are equal. +func symmetricTest(t *testing.T, formatName, dest string) { + var expectedFileCount int + filepath.Walk("testdata", func(fpath string, info os.FileInfo, err error) error { + expectedFileCount++ + return nil + }) + + // If outputs equals inputs, we're good; traverse output files + // and compare file names, file contents, and file count. + var actualFileCount int + filepath.Walk(dest, func(fpath string, info os.FileInfo, err error) error { + if fpath == dest { + return nil + } + actualFileCount++ + + origPath, err := filepath.Rel(dest, fpath) + if err != nil { + t.Fatalf("[%s] %s: Error inducing original file path: %v", formatName, fpath, err) + } + + if info.IsDir() { + // stat dir instead of read file + _, err = os.Stat(origPath) + if err != nil { + t.Fatalf("[%s] %s: Couldn't stat original directory (%s): %v", formatName, + fpath, origPath, err) + } + return nil + } + + expectedFileInfo, err := os.Stat(origPath) + if err != nil { + t.Fatalf("[%s] %s: Error obtaining original file info: %v", formatName, fpath, err) + } + expected, err := ioutil.ReadFile(origPath) + if err != nil { + t.Fatalf("[%s] %s: Couldn't open original file (%s) from disk: %v", formatName, + fpath, origPath, err) + } + + actualFileInfo, err := os.Stat(fpath) + if err != nil { + t.Fatalf("[%s] %s: Error obtaining actual file info: %v", formatName, fpath, err) + } + actual, err := ioutil.ReadFile(fpath) + if err != nil { + t.Fatalf("[%s] %s: Couldn't open new file from disk: %v", formatName, fpath, err) + } + + if actualFileInfo.Mode() != expectedFileInfo.Mode() { + t.Fatalf("[%s] %s: File mode differed between on disk and compressed", formatName, + expectedFileInfo.Mode().String()+" : "+actualFileInfo.Mode().String()) + } + if !bytes.Equal(expected, actual) { + t.Fatalf("[%s] %s: File contents differed between on disk and compressed", formatName, origPath) + } + + return nil + }) + + if got, want := actualFileCount, expectedFileCount; got != want { + t.Fatalf("[%s] Expected %d resulting files, got %d", formatName, want, got) + } +} + +var archiveFormats = []interface{}{ + DefaultZip, + DefaultTar, + DefaultTarBz2, + DefaultTarGz, + DefaultTarLz4, + DefaultTarSz, + DefaultTarXz, +} + +type archiverUnarchiver interface { + Archiver + Unarchiver +} diff --git a/archiver/rar.go b/archiver/rar.go index 1356eba8..ba55fecf 100644 --- a/archiver/rar.go +++ b/archiver/rar.go @@ -1,6 +1,7 @@ package archiver import ( + "bytes" "fmt" "io" "log" @@ -323,6 +324,28 @@ func (r *Rar) Extract(source, target, destination string) error { }) } +// Match returns true if the format of file matches this +// type's format. It should not affect reader position. +func (*Rar) Match(file *os.File) (bool, error) { + currentPos, err := file.Seek(0, io.SeekCurrent) + if err != nil { + return false, err + } + _, err = file.Seek(0, 0) + if err != nil { + return false, err + } + defer file.Seek(currentPos, io.SeekStart) + + buf := make([]byte, 8) + if n, err := file.Read(buf); err != nil || n < 8 { + return false, nil + } + hasTarHeader := bytes.Equal(buf[:7], []byte("Rar!\x1a\x07\x00")) || // ver 1.5 + bytes.Equal(buf, []byte("Rar!\x1a\x07\x01\x00")) // ver 5.0 + return hasTarHeader, nil +} + func (r *Rar) String() string { return "rar" } type rarFileInfo struct { @@ -342,6 +365,7 @@ var ( _ = Unarchiver(new(Rar)) _ = Walker(new(Rar)) _ = Extractor(new(Rar)) + _ = Matcher(new(Rar)) _ = os.FileInfo(rarFileInfo{}) ) diff --git a/archiver/tar.go b/archiver/tar.go index 7fc1ea5f..dd9cf0d2 100644 --- a/archiver/tar.go +++ b/archiver/tar.go @@ -2,12 +2,14 @@ package archiver import ( "archive/tar" + "bytes" "fmt" "io" "log" "os" "path" "path/filepath" + "strconv" "strings" ) @@ -524,8 +526,66 @@ func (t *Tar) Extract(source, target, destination string) error { }) } +// Match returns true if the format of file matches this +// type's format. It should not affect reader position. +func (*Tar) Match(file *os.File) (bool, error) { + currentPos, err := file.Seek(0, io.SeekCurrent) + if err != nil { + return false, err + } + _, err = file.Seek(0, 0) + if err != nil { + return false, err + } + defer file.Seek(currentPos, io.SeekStart) + + buf := make([]byte, tarBlockSize) + if _, err = io.ReadFull(file, buf); err != nil { + return false, nil + } + return hasTarHeader(buf), nil +} + +// hasTarHeader checks passed bytes has a valid tar header or not. buf must +// contain at least 512 bytes and if not, it always returns false. +func hasTarHeader(buf []byte) bool { + if len(buf) < tarBlockSize { + return false + } + + b := buf[148:156] + b = bytes.Trim(b, " \x00") // clean up all spaces and null bytes + if len(b) == 0 { + return false // unknown format + } + hdrSum, err := strconv.ParseUint(string(b), 8, 64) + if err != nil { + return false + } + + // According to the go official archive/tar, Sun tar uses signed byte + // values so this calcs both signed and unsigned + var usum uint64 + var sum int64 + for i, c := range buf { + if 148 <= i && i < 156 { + c = ' ' // checksum field itself is counted as branks + } + usum += uint64(uint8(c)) + sum += int64(int8(c)) + } + + if hdrSum != usum && int64(hdrSum) != sum { + return false // invalid checksum + } + + return true +} + func (t *Tar) String() string { return "tar" } +const tarBlockSize = 512 + // Compile-time checks to ensure type implements desired interfaces. var ( _ = Reader(new(Tar)) @@ -534,6 +594,7 @@ var ( _ = Unarchiver(new(Tar)) _ = Walker(new(Tar)) _ = Extractor(new(Tar)) + _ = Matcher(new(Tar)) ) // DefaultTar is a convenient archiver ready to use. diff --git a/archiver/zip.go b/archiver/zip.go index a0d8b65f..9828c630 100644 --- a/archiver/zip.go +++ b/archiver/zip.go @@ -2,6 +2,7 @@ package archiver import ( "archive/zip" + "bytes" "compress/flate" "fmt" "io" @@ -492,6 +493,26 @@ func (z *Zip) Extract(source, target, destination string) error { }) } +// Match returns true if the format of file matches this +// type's format. It should not affect reader position. +func (*Zip) Match(file *os.File) (bool, error) { + currentPos, err := file.Seek(0, io.SeekCurrent) + if err != nil { + return false, err + } + _, err = file.Seek(0, 0) + if err != nil { + return false, err + } + defer file.Seek(currentPos, io.SeekStart) + + buf := make([]byte, 4) + if n, err := file.Read(buf); err != nil || n < 4 { + return false, nil + } + return bytes.Equal(buf, []byte("PK\x03\x04")), nil +} + func (z *Zip) String() string { return "zip" } // Compile-time checks to ensure type implements desired interfaces. @@ -502,6 +523,7 @@ var ( _ = Unarchiver(new(Zip)) _ = Walker(new(Zip)) _ = Extractor(new(Zip)) + _ = Matcher(new(Zip)) ) // compressedFormats is a (non-exhaustive) set of lowercased From f50eb754f1d26fb51c4674d9a5debbed9f27dfac Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Wed, 7 Nov 2018 12:13:16 -0700 Subject: [PATCH 17/18] Oops; copy testdata into this package --- archiver/testdata/already-compressed.jpg | Bin 0 -> 8944 bytes archiver/testdata/proverbs/extra/proverb3.txt | 2 ++ archiver/testdata/proverbs/proverb1.txt | 2 ++ archiver/testdata/proverbs/proverb2.txt | 2 ++ archiver/testdata/quote1.txt | 2 ++ 5 files changed, 8 insertions(+) create mode 100644 archiver/testdata/already-compressed.jpg create mode 100644 archiver/testdata/proverbs/extra/proverb3.txt create mode 100644 archiver/testdata/proverbs/proverb1.txt create mode 100644 archiver/testdata/proverbs/proverb2.txt create mode 100644 archiver/testdata/quote1.txt diff --git a/archiver/testdata/already-compressed.jpg b/archiver/testdata/already-compressed.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3d599f8065e9b339f6be888e9f7ce941825b4c98 GIT binary patch literal 8944 zcmb7pS5y;F(``ZxB^Y{@UP6`L!4L>ldIE%|^p13uCcT7ErGpBgh2D{>ROv|Xpi}`t z1pyUAFW-0n|KUE~IWKe8UUSwtYoE1dX3xLnf1dylT}>TL01yZO0B;xI-x@$20HL6y zq9lha|G{owL_kRNZxujEc6MiZ?!`@|B=n3NE9vd6n@cTP!7WB~+UxgJG0% zM4{}hjfR0kX+G28!I9t*Q#R({C@&-K0F$4m`Kycifz=l^O%+qUI33E>PkBtWb)E~0 z79f`$yqq~wV`9*dSIeC21Cq0M2HC58(cxO5RMe&J7EL#Xe2{jb-2|tdD@4C>{wtR@ z=xOOu#Z!S3c!3j+*%yM?3m4QhO%2OfPI{nPf#oj#yt}-EE+ui!PcW&n@*D9kPn{!{ zTQa2;j?x)~FbRhl`z#5V>?E<~+O#X@RvyK=jaSVtTSp8jwFgVM$kw*|HWzu6h81~O zNirD4=d6`CY>cTTO$LmqZ6H|xTIat_yEXCyuAVNIi-mOG_Cvhd6WlWL2aX>CDI z_{%dnqar;bDykek&wGCa=&FI?5>rzhn>ck6aG_k)5VIhXfyq?aHy=|eB6ImB$MJ)S zhL&jPMpfLleJ6Lh#W9>P zMwS;BnQPZv{it~0@o2!>$lZGIoXjAhvce;~EI3Ejk4LfgB{FN)m6vVQwOMD-It3kv+6<`xJcfPf;!uAh46s#QfZd9$njOVE4z~D zagUkG*zsU?>s+r+D{uQ4(@ZzNk)SxvEMTQ;BKXDQ51l9ieVSmaSBsD3dq>?J6{*dt zH@+ye?)<=2tNSZeesRcni3C(rUv(MzUjM^k~+j$vZvE34DSv{qoY!&;}&7F6g+wkx*Sx_1+? zsl4%6KgCCWHT9W|mENI7_f5T1&6VYkg>A{T*QPals8IJ#5l@E@yUx#gu!T_R=JJxQ z{PgTj_7e9z2SHc)_pLiGk2}sSCb>S$_w3O%Rz++kp)bz^b-R(z8>fz^ zX8K-K?b#>vnEC$KLXPZMqXbO5wP zWhR~X%ll;FqR7iol_*R6(#xujHUHV)UAe1y*-90Xz7;jLnVPPP#qQ|YvWEAy`D^kP z1{3qmxF0X7^@jY_Oa%jN@hJW>+vlu2k3TQ2zY2hwq7}KPynN|}L|M20Opb#cE6!4_pG#l9NPAQ#m|jUf}-1)kMjE~8;qM{@PFGTk?$@D{6A z{?)9;cKr3T?!rPfsEHHJ{-GqrPU7z0-F3F{E{~M=!B{vYHc;@6^|eyE=epc(gXML#f!qT+34n5fnLryYs7_wmQI%^m^NRt%HM?XEV z!@;?=r?y5@-IWDuQsmZdBR!-ZFqzc4M+eWB%&s!;eya|OOVj1(ep@hO5LV1&@Buk2C%$_ z7c6q)HkQ6@dnu_ z;QD>o)g7sa)I=mfFR?4B>_niFZ_6PT-qoq|^Gnfye}KO8YvFUd0_-DkW^ClQx&}f~ z&o@Zd$BjFWV4NIB+;`sLHH>9AEICaX_kF4uR?<@f(_VVrw~gZX^IGjv^$pp%D^-PC z{0`^E(NM1JVB0&-BWQloKfthQ{X?~VLW=&))tAje$EPhs!wrEYyg?)P)u(SBFI~$W z_Q337e#HSl{mRXk6N|Pw!9VvZF~yHBf}@l%nOG+haK@wWH=sbH(I2fFVPymO=Iz)@ zFFVI~@6}ZKx1{|2TSy*H=RP2vzGCjtl#D-GK3JhFbcmCej?9WjTW1F6`Q$sC>VIU7 zH(Y!d!LZ|3DB7M=aVN|D;jaO{H$Hakf5qo9#2|IF^WD3|%PWlOo1L99b?s;{Yw8GO z$5Not?Vf2x#YK3<_p8TR{$Jh2Pk$INFfijBAmCPINB-K`R^bU<$vbyJquFWdDN4=y^}J9tZpaTJ(9Jp4tulxZ!^&> zE-1KI=O>gSS~h(@yVxUBX>xTZAf$`X?dYeQNP{Hn%Avjca9uyx>uU#X*7OfQsF%v>er9Rp~~-Se38J^jMclB zPFj7Zch$u-C8ck4`*IHUtC-F?eeh$MWKC6e z1{QxgKWoutil+KsRg51%1b+>*T0R9WlVDKmDAWD5;SFO;FaM;h7YV<9j~wd%1AMI= zC@-FW%OMi&8!Dtn+4+%tjaxyHBxwHaJ6{DXhg&a+y?sw|qTv_R3+IKNMhC@Qj^g1x zdQ--XZ6ExHy&UBb@1NmH5l}NP0XeuMzy0--(@Jcf?9TQX)AFBi4ps%W;U**UvbK!n z6$Zh9j)a{W-t2>|2r&Gq8S=ZY=O{;e$+2|aJ zzY0YtjnhFxlz$5S1El&q4r~8KzrbGM>jiY#Rra4h-)ODduGsJ^(IDYcgv^6mDHeSl zF9inVjMIJjdyblmSp6Hh+?FpjG43*+vtuOhRV)vZL|6)2u|j|s!=}$#J)S#{{dCF- zt)bM5v<_=&)Yy7jK00<+Tqyiu_4C{Y!co34H|F;4dZu4_P$J5r>qlcV$hEWyUwS9u zA3JmennRnwyM_EkS=CCST$)(ksnV%ZJqWdvUWJ*3^_1bB$s@<~m~s8ww`&VwfxeZF3LZA;OBkK|njGv_VQOc68Ae?UcIgSkdutY)$<1 ztWut(slazWd!VSG!4>F3vDzN~^f;i>K+QN@jb_QiDNuz3TSCj1=rKM~wYWR6^VRgk zuT>-b0j9bq%D!vNw?+-qY4%;juHlJHBQ`>@{pCoqCn`H(z`rdS%lw#6{r=dRuIRlC zTGo5XR*haHZ6OYA@E5s8s$pHjKha+rglsYiu+eF#*I{docgOZvY9G{6+%wCQ-FtG= z39P^HQcLo{xCgkWM9CSiH8jG6Nz+pemWrqg)Rf;hBWk}=?|mp2ZJlZdU9OxaNsObg0`Xm8LJ!3gC_sgD@g3nm_uG6dIK3+V}0-sK)5zHxW~*P!5vmwY3qARF1Z#*Zms{_AYTL;r zPw+Ip`{o|c6R^Q5gPhqlrZl}_q^wN7TCn&FvJ#}CCcI_!!AScjk+p}dEQQ8<$J}3+ zG|^Qp-QyzPYa2cONE*gZfbQ+mv`kLOtyY}~4+e?+14wUv5w~dmswi$F?qN;TJkn?} zY(l20I^@x8S($(oRjv|YKG^RHnsl}Be%iWbSIvM*!I^7E0)@3jrvZG)#dbu^WLCe& zCw0x%&ksjjRsyfyt{5_%2=2g#22inoU3c*#d~5gC*la4(I%F=jO3T2H8qIncCh{lW zPz{3fO}*P{9WFiHY|J(oV_Y2nJd38HKSGr+=n9*7z(~617jQ{E*FvS-{{W+R8vX&y zRLc=1L5{Hs))~k=X2v~9hZXmQ7>q}GX=EB?u)(jvd{^$NpwXl$TDWapzHkqd8#>7- zwGqE2ybPjdul7U=28M+0fjnIbgYL>WIGANa>!uOG#y^OGR<3OY9;1gW+a{476M?$a zXDH%lZmy==v$NN*2VY@G%P(gu;%(;arE%{_;f(7dFtK0?y2b_xssut(|0fo~X+_QG zR4XNWp5{k)`DKmrq%G4Mr?MLaOjsFtTl$>vyM7+<(-ZFpE(GY{MTW13qZUHSHsfCR zGK|(TgX>Ub;(0pd+caO`^@0U^5UKPT?Qcr*e3{jZ1T$!ZT|pA4$c*Kc!o1U06k2JOf z9v#&9{brft!8$UnFeh_xSM2154m}r};=EhyNc9pp_D6RS@;k8<4Vo zMqE`ER(jkGZPx|jt035mw4?1hV+A(s4D!K-!MEjvQ+!l z+@PjNu}cFJy?+=$ZdvxKH@NBqfwFG1ugpD1j+=-&2&m47oZY+p(#5^+@+2B2xJKSJ zWNdxPysX8&vIX?BI6+g;-qyf!5CD*nfQXQogydG#`ELz;EBtUt5Yng_L%8jTI3;}} zX~j*_i~D$BaQns?zb#@qsi^-ciAnAQOuss`Lq3H>p9OA76Z+xz+XGbo0qR3fUmVDp zXcPLC)l(%6zkT(ynJ6x)pNNKP%YWXRUa4M1WyF6-Up;)!IWYX_Axs)s`tdC8$rcqI zG<%8ihVG|?a{Q0sdY>_>r#bnp6$TYo8tt2;){`UmPy-VZa1Os3hS!iNY$ zva2T&&wu|l%6dqB<-@a-5E8FIGabPCx~V2Ze^Kq|m5mlPkW%T$knsKsVt=&u1YjvZ zEkh21|8)5hylC=~4^-m`S8ydQ#Qh{A)F0!`e+4)$oGfE=`9Wel`70&*TH}?A{3yJ8 z*nsUli`+c!lWC{W-v@Lo;Q4nn1%PJLw*LVQe`N@I{z7a*(uDWkgyN2I4%@pBdx36mSegCzY)cI?4O;sB2hZC~QAg zJ$uAk540udCWhHrPx5@Uu}#X0{E|Zop-QnXotiQQv@Iv+r{K5jvAt>zSNshh$$l67 zB%hxF>Jl0B*}J$(brRPCr}IzJkKmlE&Zf)}%R$1;N=52~O)Y6N1=OP*tD3!-EGo3b zr|7F#kYV%cb~o2kcmwj0mauUg4WpFy&#l_MW$&`w*xVRxD!DU#dF*IPpRWtCE~LzM z3HD7q!8cK(Go#)Cx%yFqOlhoB%rz%HtA`p)^Vy>*P0d%M`$Ia=FJ5kYmJOpB<9_uxgD+tF1iqUEyIgj{FVITWm3vd`IQ=?tju__nW_5GMupGSTlf*mE$LBUoRyNH?jQ%RMxp(De9FGm3Ibtj5i|bx<7KlYyzXR{9EvOGf^AR9^LKSfN zMezFe$W6`|+gKy)N6whY0v~El`LWz5X!RlTS|8FdP&pK;93WoOnWKveAmj@DDH+FRo3AsVq9fiAf9i z2GVl|>Qm-TbN(@V?Gzvh0&vfQSo=8_5$W&y`^OfLeik=orU%@RDfgz z^rlow*Ch67HO2*{Who}ZZ-3|0_LJAGIK}{7&B~q!f9JP@M0gf*BRDl#S7xTei>ltl z_J71?89>T(D?rc^(AcZl3HH9I6TLI}D`ZCgVu<(jkE`|G2nWtU;p`ih#K99ZYs&)c zgv$R7x0z|`s;%K6@Ley3RQ`ML=AqB|4B69jQ*qiP#|Dl(Bkp$yayO_H-TW_uBmH8f zQ%+3ri3R7+PVzukG#7yGcFCZ5xsyUagrGX@9q-#DX? zDxsiU!4^L;@h)4>^*XHU^~O(h5jb8I$IU5aG7*7_5Myn*Mj~cf)*R_ZJCIkyO?sB0 za_8!dWZyrpoX|ai%N?K);WT`8XY=eyN~F#FJL)$=BMSgF?W58J6*_u^u~7UB)iK5C zD><|-WyE4Axac3il|`m2OZP(PboY%3jPSRX_zzRa%%cc809;HCWUjH_rA|6(mKGt$ zU#9#*%B0*q|GWN?94j}Wzg_1}AUIkFaFjUCnt&ucZaZv>a;WOA(x~brPpAU1)0e>FjnFMJa>IjF=N@4jBHyI zs|r}rWCH3H((k#4kI*`jGHq#aE;#vC`A!s~=;54C5u~7rB6^`yXkA08Yb(mQt+G}f zCsm}+5UPEYWW5>YxepKSfW)X zigaEe_M76P=TKrr8_pD2&IAv1`EW}h(qzYSz^2WDj50A4Z01lCd5~}6V3H4$1S_{8 zF5HCb(Yc3yk6XReI`Lv;Y6mcSKU^sbQfMS+W_Pzmh^1ITeKHN$va=?g#{HTi2~Cjt`tunb)|9q%2YBo> zj=BrZKJz=P3g1z11Q~H#pgiF`E@hhuTE_c`N_atDGmorOMDajU zks65(q;KX{!Xk5{BGfL*+W;RAA{Y)-q|b_F%ps-iEPTgVR!;s$56Dj-ZaC=-*{X5P zXVHERE6JPjV|XN)Tjpxy8_msE5;K8E**tXuNyk$6W~^`z;_k}eL`OyC3Q7`|A|{E@ z&iw%$$aa`UHwTjGGP{aYVO&+TuDBFHN07JDP*5*nJtO3tU?(#TtW<~e@fRDNttfn$ z(X+{AC6Y{AvW~7wGU*aSyB(EH%B(01Q1&t=0NFItSlcxssCL(JmSnE;EHNOqQ`#Bh z+52u4ARBq}m@1)I1d*7phJ8X*B`zD2Nk+4YXUHX53h?ltu8QlJ@pcz;-YcMXR0~OD z6?AB7w?2-1;+i`h1B2L8hkWGB6mK}PWNba50!B4LwxkT-agn`(a$kpZ?1am} zAyjnw$xtUIUc{43id8a_4B93;y|u=O1fM~-{CSq&Mf*zoO(8=C0#QiUSl8TFH5AmG7;vAJg1 zQ&%(lFuEMr!`t?O^L9*pYyTsk|EGNb03p-_oHXJRFf}ft|Mm$4K;V7Axf#~*mxqRO zm@Q&?A*k{hhsp!ofnkn&Ha0Xk4}?pKpH78L7^n3hjS=bxTDgdU^W_vi*ZnvQ6mR;T zd3e@dq=)T{R-|c5f8gG1*nMf2QFzY1>Hl?>ZZZ^ zHw(*BAnyhBic=Vp5cddTW8F-KK@N0f)59h5nMPeuUoC1XIzC=(>Gnp1BXDjXdW5?9_s8JfaVE?5^AxkF?TH8^1Zw z$gmi4%v#uHs~Qqd4fkOEphsdQ)Ew`OuM1?zrW?sOeI5`W^F(fFK`I6-@nMGqMUfsgk{Fk|dc53ID?hg=7X?jw;0 z3xCNEu}Q*T>#Ah}$IBX>o+wu`pzSxrvWK$P&yqwMy&P7a)~M7p-e`z4dUo5gFIv3C zL590@N{!iT_0KkSxL-2kJYPetHC#1 zUQNT?e{nQ)M2R%(v2Eil!iR@+L@{C{^2uPjq_@>FENy2pHe}>PkD!OpSu;i+8a6G$ z?@h*Y3n4gNSMQ(i34RSW{Dw9);cyB^$|{a8sgr4TXQ3<5>A$MPROD=01q>tA;}Z^} zOs_!mQzZP7XWPrgS``Vk8ZW7LU(l4SfWO}(zeyZ4lBP*44Uut^vHQG}O8g2tpQx5# z%xrP~L)M++Se1C37na-2Sn)mahL2h+8j&xWuX~?`&GnOxig}{pT6f%X6g#2$cFRZPaeb~sE7v!S~T5KqmHYVKm1$yKTerK ATmS$7 literal 0 HcmV?d00001 diff --git a/archiver/testdata/proverbs/extra/proverb3.txt b/archiver/testdata/proverbs/extra/proverb3.txt new file mode 100644 index 00000000..4a4768d4 --- /dev/null +++ b/archiver/testdata/proverbs/extra/proverb3.txt @@ -0,0 +1,2 @@ +"interface{} says nothing." + - Rob Pike \ No newline at end of file diff --git a/archiver/testdata/proverbs/proverb1.txt b/archiver/testdata/proverbs/proverb1.txt new file mode 100644 index 00000000..88da02ef --- /dev/null +++ b/archiver/testdata/proverbs/proverb1.txt @@ -0,0 +1,2 @@ +"Channels orchestrate; mutexes serialize." + - Rob Pike \ No newline at end of file diff --git a/archiver/testdata/proverbs/proverb2.txt b/archiver/testdata/proverbs/proverb2.txt new file mode 100644 index 00000000..8e075027 --- /dev/null +++ b/archiver/testdata/proverbs/proverb2.txt @@ -0,0 +1,2 @@ +"A little copying is better than a little dependency." + - Rob Pike \ No newline at end of file diff --git a/archiver/testdata/quote1.txt b/archiver/testdata/quote1.txt new file mode 100644 index 00000000..1c34480d --- /dev/null +++ b/archiver/testdata/quote1.txt @@ -0,0 +1,2 @@ +"Go has generics; they're called interfaces." + - Matt Holt \ No newline at end of file From d48ce61eb2c501388e99ee300b8c7e622c7cfc88 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Wed, 7 Nov 2018 12:55:25 -0700 Subject: [PATCH 18/18] Finally replace old package with new one --- archiver.go | 199 ++++-- archiver/archiver.go | 234 ------- archiver/archiver_test.go | 290 -------- archiver/rar.go | 375 ---------- archiver/tar.go | 603 ---------------- archiver/tarbz2.go | 112 --- archiver/targz.go | 110 --- archiver/tarlz4.go | 107 --- archiver/tarsz.go | 100 --- archiver/tarxz.go | 105 --- archiver/testdata/already-compressed.jpg | Bin 8944 -> 0 bytes archiver/testdata/proverbs/extra/proverb3.txt | 2 - archiver/testdata/proverbs/proverb1.txt | 2 - archiver/testdata/proverbs/proverb2.txt | 2 - archiver/testdata/quote1.txt | 2 - archiver/zip.go | 573 --------------- archiver_test.go | 325 +++++---- archiver/bz2.go => bz2.go | 0 {archiver/cmd => cmd}/arc/main.go | 0 cmd/archiver/main.go | 94 --- .../filecompressor.go => filecompressor.go | 0 archiver/gz.go => gz.go | 0 archiver/lz4.go => lz4.go | 0 rar.go | 385 +++++++++-- archiver/sz.go => sz.go | 0 tar.go | 653 ++++++++++++++---- tarbz2.go | 156 +++-- targz.go | 150 ++-- tarlz4.go | 139 ++-- tarsz.go | 134 ++-- tarxz.go | 150 ++-- archiver/xz.go => xz.go | 0 zip.go | 587 ++++++++++++---- 33 files changed, 2013 insertions(+), 3576 deletions(-) delete mode 100644 archiver/archiver.go delete mode 100644 archiver/archiver_test.go delete mode 100644 archiver/rar.go delete mode 100644 archiver/tar.go delete mode 100644 archiver/tarbz2.go delete mode 100644 archiver/targz.go delete mode 100644 archiver/tarlz4.go delete mode 100644 archiver/tarsz.go delete mode 100644 archiver/tarxz.go delete mode 100644 archiver/testdata/already-compressed.jpg delete mode 100644 archiver/testdata/proverbs/extra/proverb3.txt delete mode 100644 archiver/testdata/proverbs/proverb1.txt delete mode 100644 archiver/testdata/proverbs/proverb2.txt delete mode 100644 archiver/testdata/quote1.txt delete mode 100644 archiver/zip.go rename archiver/bz2.go => bz2.go (100%) rename {archiver/cmd => cmd}/arc/main.go (100%) delete mode 100644 cmd/archiver/main.go rename archiver/filecompressor.go => filecompressor.go (100%) rename archiver/gz.go => gz.go (100%) rename archiver/lz4.go => lz4.go (100%) rename archiver/sz.go => sz.go (100%) rename archiver/xz.go => xz.go (100%) diff --git a/archiver.go b/archiver.go index 6bf20162..68c53d2a 100644 --- a/archiver.go +++ b/archiver.go @@ -3,46 +3,133 @@ package archiver import ( "fmt" "io" - "log" "os" + "path" "path/filepath" "runtime" "strings" ) -// Archiver represent a archive format +// Archiver is a type that can create an archive file +// from a list of source file names. type Archiver interface { - // Match checks supported files - Match(filename string) bool - // Make makes an archive file on disk. - Make(destination string, sources []string) error - // Open extracts an archive file on disk. - Open(source, destination string) error - // Write writes an archive to a Writer. - Write(output io.Writer, sources []string) error - // Read reads an archive from a Reader. - Read(input io.Reader, destination string) error -} - -// SupportedFormats contains all supported archive formats -var SupportedFormats = map[string]Archiver{} - -// RegisterFormat adds a supported archive format -func RegisterFormat(name string, format Archiver) { - if _, ok := SupportedFormats[name]; ok { - log.Printf("Format %s already exists, skip!\n", name) - return + Archive(sources []string, destination string) error +} + +// Unarchiver is a type that can extract archive files +// into a folder. +type Unarchiver interface { + Unarchive(source, destination string) error +} + +// Writer can write discrete byte streams of files to +// an output stream. +type Writer interface { + Create(out io.Writer) error + Write(f File) error + Close() error +} + +// Reader can read discrete byte streams of files from +// an input stream. +type Reader interface { + Open(in io.Reader, size int64) error + Read() (File, error) + Close() error +} + +// Extractor can extract a specific file from a source +// archive to a specific destination folder on disk. +type Extractor interface { + Extract(source, target, destination string) error +} + +// File provides methods for accessing information about +// or contents of a file within an archive. +type File struct { + os.FileInfo + + // The original header info; depends on + // type of archive -- could be nil, too. + Header interface{} + + // Allow the file contents to be read (and closed) + io.ReadCloser +} + +// FileInfo is an os.FileInfo but optionally with +// a custom name, useful if dealing with files that +// are not actual files on disk, or which have a +// different name in an archive than on disk. +type FileInfo struct { + os.FileInfo + CustomName string +} + +// Name returns fi.CustomName if not empty; +// otherwise it returns fi.FileInfo.Name(). +func (fi FileInfo) Name() string { + if fi.CustomName != "" { + return fi.CustomName } - SupportedFormats[name] = format + return fi.FileInfo.Name() } -// MatchingFormat returns the first archive format that matches -// the given file, or nil if there is no match -func MatchingFormat(fpath string) Archiver { - for _, fmt := range SupportedFormats { - if fmt.Match(fpath) { - return fmt - } +// ReadFakeCloser is an io.Reader that has +// a no-op close method to satisfy the +// io.ReadCloser interface. +type ReadFakeCloser struct { + io.Reader +} + +// Close implements io.Closer. +func (rfc ReadFakeCloser) Close() error { return nil } + +// Walker can walk an archive file and return information +// about each item in the archive. +type Walker interface { + Walk(archive string, walkFn WalkFunc) error +} + +// WalkFunc is called at each item visited by Walk. +// If an error is returned, the walk may continue +// if the Walker is configured to continue on error. +// The sole exception is the error value ErrStopWalk, +// which stops the walk without an actual error. +type WalkFunc func(f File) error + +// ErrStopWalk signals Walk to break without error. +var ErrStopWalk = fmt.Errorf("walk stopped") + +// Compressor compresses to out what it reads from in. +// It also ensures a compatible or matching file extension. +type Compressor interface { + Compress(in io.Reader, out io.Writer) error + CheckExt(filename string) error +} + +// Decompressor decompresses to out what it reads from in. +type Decompressor interface { + Decompress(in io.Reader, out io.Writer) error +} + +// Matcher is a type that can return whether the given +// file appears to match the implementation's format. +// Implementations should return the file's read position +// to where it was when the method was called. +type Matcher interface { + Match(*os.File) (bool, error) +} + +func fileExists(name string) bool { + _, err := os.Stat(name) + return !os.IsNotExist(err) +} + +func mkdir(dirPath string) error { + err := os.MkdirAll(dirPath, 0755) + if err != nil { + return fmt.Errorf("%s: making directory: %v", dirPath, err) } return nil } @@ -99,21 +186,49 @@ func writeNewHardLink(fpath string, target string) error { return nil } -func mkdir(dirPath string) error { - err := os.MkdirAll(dirPath, 0755) +// within returns true if sub is within or equal to parent. +func within(parent, sub string) bool { + rel, err := filepath.Rel(parent, sub) if err != nil { - return fmt.Errorf("%s: making directory: %v", dirPath, err) + return false } - return nil + return !strings.Contains(rel, "..") } -func sanitizeExtractPath(filePath string, destination string) error { - // to avoid zip slip (writing outside of the destination), we resolve - // the target path, and make sure it's nested in the intended - // destination, or bail otherwise. - destpath := filepath.Join(destination, filePath) - if !strings.HasPrefix(destpath, filepath.Clean(destination)) { - return fmt.Errorf("%s: illegal file path", filePath) +// multipleTopLevels returns true if the paths do not +// share a common top-level folder. +func multipleTopLevels(paths []string) bool { + if len(paths) < 2 { + return false } - return nil + var lastTop string + for _, p := range paths { + p = strings.TrimPrefix(strings.Replace(p, `\`, "/", -1), "/") + for { + next := path.Dir(p) + if next == "." { + break + } + p = next + } + if lastTop == "" { + lastTop = p + } + if p != lastTop { + return true + } + } + return false +} + +// folderNameFromFileName returns a name for a folder +// that is suitable based on the filename, which will +// be stripped of its extensions. +func folderNameFromFileName(filename string) string { + base := filepath.Base(filename) + firstDot := strings.Index(base, ".") + if firstDot > -1 { + return base[:firstDot] + } + return base } diff --git a/archiver/archiver.go b/archiver/archiver.go deleted file mode 100644 index 68c53d2a..00000000 --- a/archiver/archiver.go +++ /dev/null @@ -1,234 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "os" - "path" - "path/filepath" - "runtime" - "strings" -) - -// Archiver is a type that can create an archive file -// from a list of source file names. -type Archiver interface { - Archive(sources []string, destination string) error -} - -// Unarchiver is a type that can extract archive files -// into a folder. -type Unarchiver interface { - Unarchive(source, destination string) error -} - -// Writer can write discrete byte streams of files to -// an output stream. -type Writer interface { - Create(out io.Writer) error - Write(f File) error - Close() error -} - -// Reader can read discrete byte streams of files from -// an input stream. -type Reader interface { - Open(in io.Reader, size int64) error - Read() (File, error) - Close() error -} - -// Extractor can extract a specific file from a source -// archive to a specific destination folder on disk. -type Extractor interface { - Extract(source, target, destination string) error -} - -// File provides methods for accessing information about -// or contents of a file within an archive. -type File struct { - os.FileInfo - - // The original header info; depends on - // type of archive -- could be nil, too. - Header interface{} - - // Allow the file contents to be read (and closed) - io.ReadCloser -} - -// FileInfo is an os.FileInfo but optionally with -// a custom name, useful if dealing with files that -// are not actual files on disk, or which have a -// different name in an archive than on disk. -type FileInfo struct { - os.FileInfo - CustomName string -} - -// Name returns fi.CustomName if not empty; -// otherwise it returns fi.FileInfo.Name(). -func (fi FileInfo) Name() string { - if fi.CustomName != "" { - return fi.CustomName - } - return fi.FileInfo.Name() -} - -// ReadFakeCloser is an io.Reader that has -// a no-op close method to satisfy the -// io.ReadCloser interface. -type ReadFakeCloser struct { - io.Reader -} - -// Close implements io.Closer. -func (rfc ReadFakeCloser) Close() error { return nil } - -// Walker can walk an archive file and return information -// about each item in the archive. -type Walker interface { - Walk(archive string, walkFn WalkFunc) error -} - -// WalkFunc is called at each item visited by Walk. -// If an error is returned, the walk may continue -// if the Walker is configured to continue on error. -// The sole exception is the error value ErrStopWalk, -// which stops the walk without an actual error. -type WalkFunc func(f File) error - -// ErrStopWalk signals Walk to break without error. -var ErrStopWalk = fmt.Errorf("walk stopped") - -// Compressor compresses to out what it reads from in. -// It also ensures a compatible or matching file extension. -type Compressor interface { - Compress(in io.Reader, out io.Writer) error - CheckExt(filename string) error -} - -// Decompressor decompresses to out what it reads from in. -type Decompressor interface { - Decompress(in io.Reader, out io.Writer) error -} - -// Matcher is a type that can return whether the given -// file appears to match the implementation's format. -// Implementations should return the file's read position -// to where it was when the method was called. -type Matcher interface { - Match(*os.File) (bool, error) -} - -func fileExists(name string) bool { - _, err := os.Stat(name) - return !os.IsNotExist(err) -} - -func mkdir(dirPath string) error { - err := os.MkdirAll(dirPath, 0755) - if err != nil { - return fmt.Errorf("%s: making directory: %v", dirPath, err) - } - return nil -} - -func writeNewFile(fpath string, in io.Reader, fm os.FileMode) error { - err := os.MkdirAll(filepath.Dir(fpath), 0755) - if err != nil { - return fmt.Errorf("%s: making directory for file: %v", fpath, err) - } - - out, err := os.Create(fpath) - if err != nil { - return fmt.Errorf("%s: creating new file: %v", fpath, err) - } - defer out.Close() - - err = out.Chmod(fm) - if err != nil && runtime.GOOS != "windows" { - return fmt.Errorf("%s: changing file mode: %v", fpath, err) - } - - _, err = io.Copy(out, in) - if err != nil { - return fmt.Errorf("%s: writing file: %v", fpath, err) - } - return nil -} - -func writeNewSymbolicLink(fpath string, target string) error { - err := os.MkdirAll(filepath.Dir(fpath), 0755) - if err != nil { - return fmt.Errorf("%s: making directory for file: %v", fpath, err) - } - - err = os.Symlink(target, fpath) - if err != nil { - return fmt.Errorf("%s: making symbolic link for: %v", fpath, err) - } - - return nil -} - -func writeNewHardLink(fpath string, target string) error { - err := os.MkdirAll(filepath.Dir(fpath), 0755) - if err != nil { - return fmt.Errorf("%s: making directory for file: %v", fpath, err) - } - - err = os.Link(target, fpath) - if err != nil { - return fmt.Errorf("%s: making hard link for: %v", fpath, err) - } - - return nil -} - -// within returns true if sub is within or equal to parent. -func within(parent, sub string) bool { - rel, err := filepath.Rel(parent, sub) - if err != nil { - return false - } - return !strings.Contains(rel, "..") -} - -// multipleTopLevels returns true if the paths do not -// share a common top-level folder. -func multipleTopLevels(paths []string) bool { - if len(paths) < 2 { - return false - } - var lastTop string - for _, p := range paths { - p = strings.TrimPrefix(strings.Replace(p, `\`, "/", -1), "/") - for { - next := path.Dir(p) - if next == "." { - break - } - p = next - } - if lastTop == "" { - lastTop = p - } - if p != lastTop { - return true - } - } - return false -} - -// folderNameFromFileName returns a name for a folder -// that is suitable based on the filename, which will -// be stripped of its extensions. -func folderNameFromFileName(filename string) string { - base := filepath.Base(filename) - firstDot := strings.Index(base, ".") - if firstDot > -1 { - return base[:firstDot] - } - return base -} diff --git a/archiver/archiver_test.go b/archiver/archiver_test.go deleted file mode 100644 index dddb0bf9..00000000 --- a/archiver/archiver_test.go +++ /dev/null @@ -1,290 +0,0 @@ -package archiver - -import ( - "bytes" - "fmt" - "io" - "io/ioutil" - "os" - "path/filepath" - "testing" -) - -func TestWithin(t *testing.T) { - for i, tc := range []struct { - path1, path2 string - expect bool - }{ - { - path1: "/foo", - path2: "/foo/bar", - expect: true, - }, - { - path1: "/foo", - path2: "/foobar/asdf", - expect: false, - }, - { - path1: "/foobar/", - path2: "/foobar/asdf", - expect: true, - }, - { - path1: "/foobar/asdf", - path2: "/foobar", - expect: false, - }, - { - path1: "/foobar/asdf", - path2: "/foobar/", - expect: false, - }, - { - path1: "/", - path2: "/asdf", - expect: true, - }, - { - path1: "/asdf", - path2: "/asdf", - expect: true, - }, - { - path1: "/", - path2: "/", - expect: true, - }, - { - path1: "/foo/bar/daa", - path2: "/foo", - expect: false, - }, - { - path1: "/foo/", - path2: "/foo/bar/daa", - expect: true, - }, - } { - actual := within(tc.path1, tc.path2) - if actual != tc.expect { - t.Errorf("Test %d: [%s %s] Expected %t but got %t", i, tc.path1, tc.path2, tc.expect, actual) - } - } -} - -func TestMultipleTopLevels(t *testing.T) { - for i, tc := range []struct { - set []string - expect bool - }{ - { - set: []string{}, - expect: false, - }, - { - set: []string{"/foo"}, - expect: false, - }, - { - set: []string{"/foo", "/foo/bar"}, - expect: false, - }, - { - set: []string{"/foo", "/bar"}, - expect: true, - }, - { - set: []string{"/foo", "/foobar"}, - expect: true, - }, - { - set: []string{"foo", "foo/bar"}, - expect: false, - }, - { - set: []string{"foo", "/foo/bar"}, - expect: false, - }, - { - set: []string{"../foo", "foo/bar"}, - expect: true, - }, - { - set: []string{`C:\foo\bar`, `C:\foo\bar\zee`}, - expect: false, - }, - { - set: []string{`C:\`, `C:\foo\bar`}, - expect: false, - }, - { - set: []string{`D:\foo`, `E:\foo`}, - expect: true, - }, - { - set: []string{`D:\foo`, `D:\foo\bar`, `C:\foo`}, - expect: true, - }, - { - set: []string{"/foo", "/", "/bar"}, - expect: true, - }, - } { - actual := multipleTopLevels(tc.set) - if actual != tc.expect { - t.Errorf("Test %d: %v: Expected %t but got %t", i, tc.set, tc.expect, actual) - } - } -} - -func TestArchiveUnarchive(t *testing.T) { - for _, af := range archiveFormats { - au, ok := af.(archiverUnarchiver) - if !ok { - t.Errorf("%s (%T): not an Archiver and Unarchiver", af, af) - continue - } - testArchiveUnarchive(t, au) - } -} - -func testArchiveUnarchive(t *testing.T, au archiverUnarchiver) { - auStr := fmt.Sprintf("%s", au) - - tmp, err := ioutil.TempDir("", "archiver_test") - if err != nil { - t.Fatalf("[%s] %v", auStr, err) - } - defer os.RemoveAll(tmp) - - // Test creating archive - outfile := filepath.Join(tmp, "archiver_test."+auStr) - err = au.Archive([]string{"testdata"}, outfile) - if err != nil { - t.Fatalf("[%s] making archive: didn't expect an error, but got: %v", auStr, err) - } - - // Test format matching (TODO: Make this its own test, out of band with the archive/unarchive tests) - //testMatching(t, au, outfile) // TODO: Disabled until we can finish implementing this for compressed tar formats - - // Test extracting archive - dest := filepath.Join(tmp, "extraction_test_"+auStr) - os.Mkdir(dest, 0755) - err = au.Unarchive(outfile, dest) - if err != nil { - t.Fatalf("[%s] extracting archive [%s -> %s]: didn't expect an error, but got: %v", auStr, outfile, dest, err) - } - - // Check that what was extracted is what was compressed - symmetricTest(t, auStr, dest) -} - -// testMatching tests that au can match the format of archiveFile. -func testMatching(t *testing.T, au archiverUnarchiver, archiveFile string) { - m, ok := au.(Matcher) - if !ok { - t.Logf("[NOTICE] %T (%s) is not a Matcher", au, au) - return - } - - file, err := os.Open(archiveFile) - if err != nil { - t.Fatalf("[%s] opening file for matching: %v", au, err) - } - defer file.Close() - - tmpBuf := make([]byte, 2048) - io.ReadFull(file, tmpBuf) - - matched, err := m.Match(file) - if err != nil { - t.Fatalf("%s (%T): testing matching: got error, expected none: %v", m, m, err) - } - if !matched { - t.Fatalf("%s (%T): format should have matched, but didn't", m, m) - } -} - -// symmetricTest compares the contents of a destination directory to the contents -// of the test corpus and tests that they are equal. -func symmetricTest(t *testing.T, formatName, dest string) { - var expectedFileCount int - filepath.Walk("testdata", func(fpath string, info os.FileInfo, err error) error { - expectedFileCount++ - return nil - }) - - // If outputs equals inputs, we're good; traverse output files - // and compare file names, file contents, and file count. - var actualFileCount int - filepath.Walk(dest, func(fpath string, info os.FileInfo, err error) error { - if fpath == dest { - return nil - } - actualFileCount++ - - origPath, err := filepath.Rel(dest, fpath) - if err != nil { - t.Fatalf("[%s] %s: Error inducing original file path: %v", formatName, fpath, err) - } - - if info.IsDir() { - // stat dir instead of read file - _, err = os.Stat(origPath) - if err != nil { - t.Fatalf("[%s] %s: Couldn't stat original directory (%s): %v", formatName, - fpath, origPath, err) - } - return nil - } - - expectedFileInfo, err := os.Stat(origPath) - if err != nil { - t.Fatalf("[%s] %s: Error obtaining original file info: %v", formatName, fpath, err) - } - expected, err := ioutil.ReadFile(origPath) - if err != nil { - t.Fatalf("[%s] %s: Couldn't open original file (%s) from disk: %v", formatName, - fpath, origPath, err) - } - - actualFileInfo, err := os.Stat(fpath) - if err != nil { - t.Fatalf("[%s] %s: Error obtaining actual file info: %v", formatName, fpath, err) - } - actual, err := ioutil.ReadFile(fpath) - if err != nil { - t.Fatalf("[%s] %s: Couldn't open new file from disk: %v", formatName, fpath, err) - } - - if actualFileInfo.Mode() != expectedFileInfo.Mode() { - t.Fatalf("[%s] %s: File mode differed between on disk and compressed", formatName, - expectedFileInfo.Mode().String()+" : "+actualFileInfo.Mode().String()) - } - if !bytes.Equal(expected, actual) { - t.Fatalf("[%s] %s: File contents differed between on disk and compressed", formatName, origPath) - } - - return nil - }) - - if got, want := actualFileCount, expectedFileCount; got != want { - t.Fatalf("[%s] Expected %d resulting files, got %d", formatName, want, got) - } -} - -var archiveFormats = []interface{}{ - DefaultZip, - DefaultTar, - DefaultTarBz2, - DefaultTarGz, - DefaultTarLz4, - DefaultTarSz, - DefaultTarXz, -} - -type archiverUnarchiver interface { - Archiver - Unarchiver -} diff --git a/archiver/rar.go b/archiver/rar.go deleted file mode 100644 index ba55fecf..00000000 --- a/archiver/rar.go +++ /dev/null @@ -1,375 +0,0 @@ -package archiver - -import ( - "bytes" - "fmt" - "io" - "log" - "os" - "path" - "path/filepath" - "time" - - "github.com/nwaples/rardecode" -) - -// Rar provides facilities for reading RAR archives. -// See https://www.rarlab.com/technote.htm. -type Rar struct { - // Whether to overwrite existing files; if false, - // an error is returned if the file exists. - OverwriteExisting bool - - // Whether to make all the directories necessary - // to create a rar archive in the desired path. - MkdirAll bool - - // A single top-level folder can be implicitly - // created by the Unarchive method if the files - // to be extracted from the archive do not all - // have a common root. This roughly mimics the - // behavior of archival tools integrated into OS - // file browsers which create a subfolder to - // avoid unexpectedly littering the destination - // folder with potentially many files, causing a - // problematic cleanup/organization situation. - // This feature is available for both creation - // and extraction of archives, but may be slightly - // inefficient with lots and lots of files, - // especially on extraction. - ImplicitTopLevelFolder bool - - // If true, errors encountered during reading - // or writing a single file will be logged and - // the operation will continue on remaining files. - ContinueOnError bool - - // The password to open archives (optional). - Password string - - rr *rardecode.Reader // underlying stream reader - rc *rardecode.ReadCloser // supports multi-volume archives (files only) -} - -// Unarchive unpacks the .rar file at source to destination. -// Destination will be treated as a folder name. It supports -// multi-volume archives. -func (r *Rar) Unarchive(source, destination string) error { - if !fileExists(destination) && r.MkdirAll { - err := mkdir(destination) - if err != nil { - return fmt.Errorf("preparing destination: %v", err) - } - } - - // if the files in the archive do not all share a common - // root, then make sure we extract to a single subfolder - // rather than potentially littering the destination... - if r.ImplicitTopLevelFolder { - var err error - destination, err = r.addTopLevelFolder(source, destination) - if err != nil { - return fmt.Errorf("scanning source archive: %v", err) - } - } - - err := r.OpenFile(source) - if err != nil { - return fmt.Errorf("opening rar archive for reading: %v", err) - } - defer r.Close() - - for { - err := r.unrarNext(destination) - if err == io.EOF { - break - } - if err != nil { - if r.ContinueOnError { - log.Printf("[ERROR] Reading file in rar archive: %v", err) - continue - } - return fmt.Errorf("reading file in rar archive: %v", err) - } - } - - return nil -} - -// addTopLevelFolder scans the files contained inside -// the tarball named sourceArchive and returns a modified -// destination if all the files do not share the same -// top-level folder. -func (r *Rar) addTopLevelFolder(sourceArchive, destination string) (string, error) { - file, err := os.Open(sourceArchive) - if err != nil { - return "", fmt.Errorf("opening source archive: %v", err) - } - defer file.Close() - - rc, err := rardecode.NewReader(file, r.Password) - if err != nil { - return "", fmt.Errorf("creating archive reader: %v", err) - } - - var files []string - for { - hdr, err := rc.Next() - if err == io.EOF { - break - } - if err != nil { - return "", fmt.Errorf("scanning tarball's file listing: %v", err) - } - files = append(files, hdr.Name) - } - - if multipleTopLevels(files) { - destination = filepath.Join(destination, folderNameFromFileName(sourceArchive)) - } - - return destination, nil -} - -func (r *Rar) unrarNext(to string) error { - f, err := r.Read() - if err != nil { - return err // don't wrap error; calling loop must break on io.EOF - } - header, ok := f.Header.(*rardecode.FileHeader) - if !ok { - return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) - } - return r.unrarFile(f, filepath.Join(to, header.Name)) -} - -func (r *Rar) unrarFile(f File, to string) error { - // do not overwrite existing files, if configured - if !f.IsDir() && !r.OverwriteExisting && fileExists(to) { - return fmt.Errorf("file already exists: %s", to) - } - - hdr, ok := f.Header.(*rardecode.FileHeader) - if !ok { - return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) - } - - // if files come before their containing folders, then we must - // create their folders before writing the file - err := mkdir(filepath.Dir(to)) - if err != nil { - return fmt.Errorf("making parent directories: %v", err) - } - - return writeNewFile(to, r.rr, hdr.Mode()) -} - -// OpenFile opens filename for reading. This method supports -// multi-volume archives, whereas Open does not (but Open -// supports any stream, not just files). -func (r *Rar) OpenFile(filename string) error { - if r.rr != nil { - return fmt.Errorf("rar archive is already open for reading") - } - var err error - r.rc, err = rardecode.OpenReader(filename, r.Password) - if err != nil { - return err - } - r.rr = &r.rc.Reader - return nil -} - -// Open opens t for reading an archive from -// in. The size parameter is not used. -func (r *Rar) Open(in io.Reader, size int64) error { - if r.rr != nil { - return fmt.Errorf("rar archive is already open for reading") - } - var err error - r.rr, err = rardecode.NewReader(in, r.Password) - return err -} - -// Read reads the next file from t, which must have -// already been opened for reading. If there are no -// more files, the error is io.EOF. The File must -// be closed when finished reading from it. -func (r *Rar) Read() (File, error) { - if r.rr == nil { - return File{}, fmt.Errorf("rar archive is not open") - } - - hdr, err := r.rr.Next() - if err != nil { - return File{}, err // don't wrap error; preserve io.EOF - } - - file := File{ - FileInfo: rarFileInfo{hdr}, - Header: hdr, - ReadCloser: ReadFakeCloser{r.rr}, - } - - return file, nil -} - -// Close closes the rar archive(s) opened by Create and Open. -func (r *Rar) Close() error { - var err error - if r.rc != nil { - rc := r.rc - r.rc = nil - err = rc.Close() - } - if r.rr != nil { - r.rr = nil - } - return err -} - -// Walk calls walkFn for each visited item in archive. -func (r *Rar) Walk(archive string, walkFn WalkFunc) error { - file, err := os.Open(archive) - if err != nil { - return fmt.Errorf("opening archive file: %v", err) - } - defer file.Close() - - err = r.Open(file, 0) - if err != nil { - return fmt.Errorf("opening archive: %v", err) - } - defer r.Close() - - for { - f, err := r.Read() - if err == io.EOF { - break - } - if err != nil { - if r.ContinueOnError { - log.Printf("[ERROR] Opening next file: %v", err) - continue - } - return fmt.Errorf("opening next file: %v", err) - } - err = walkFn(f) - if err != nil { - if err == ErrStopWalk { - break - } - if r.ContinueOnError { - log.Printf("[ERROR] Walking %s: %v", f.Name(), err) - continue - } - return fmt.Errorf("walking %s: %v", f.Name(), err) - } - } - - return nil -} - -// Extract extracts a single file from the rar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (r *Rar) Extract(source, target, destination string) error { - // target refers to a path inside the archive, which should be clean also - target = path.Clean(target) - - // if the target ends up being a directory, then - // we will continue walking and extracting files - // until we are no longer within that directory - var targetDirPath string - - return r.Walk(source, func(f File) error { - th, ok := f.Header.(*rardecode.FileHeader) - if !ok { - return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) - } - - // importantly, cleaning the path strips tailing slash, - // which must be appended to folders within the archive - name := path.Clean(th.Name) - if f.IsDir() && target == name { - targetDirPath = path.Dir(name) - } - - if within(target, th.Name) { - // either this is the exact file we want, or is - // in the directory we want to extract - - // build the filename we will extract to - end, err := filepath.Rel(targetDirPath, th.Name) - if err != nil { - return fmt.Errorf("relativizing paths: %v", err) - } - joined := filepath.Join(destination, end) - - err = r.unrarFile(f, joined) - if err != nil { - return fmt.Errorf("extracting file %s: %v", th.Name, err) - } - - // if our target was not a directory, stop walk - if targetDirPath == "" { - return ErrStopWalk - } - } else if targetDirPath != "" { - // finished walking the entire directory - return ErrStopWalk - } - - return nil - }) -} - -// Match returns true if the format of file matches this -// type's format. It should not affect reader position. -func (*Rar) Match(file *os.File) (bool, error) { - currentPos, err := file.Seek(0, io.SeekCurrent) - if err != nil { - return false, err - } - _, err = file.Seek(0, 0) - if err != nil { - return false, err - } - defer file.Seek(currentPos, io.SeekStart) - - buf := make([]byte, 8) - if n, err := file.Read(buf); err != nil || n < 8 { - return false, nil - } - hasTarHeader := bytes.Equal(buf[:7], []byte("Rar!\x1a\x07\x00")) || // ver 1.5 - bytes.Equal(buf, []byte("Rar!\x1a\x07\x01\x00")) // ver 5.0 - return hasTarHeader, nil -} - -func (r *Rar) String() string { return "rar" } - -type rarFileInfo struct { - fh *rardecode.FileHeader -} - -func (rfi rarFileInfo) Name() string { return rfi.fh.Name } -func (rfi rarFileInfo) Size() int64 { return rfi.fh.UnPackedSize } -func (rfi rarFileInfo) Mode() os.FileMode { return rfi.fh.Mode() } -func (rfi rarFileInfo) ModTime() time.Time { return rfi.fh.ModificationTime } -func (rfi rarFileInfo) IsDir() bool { return rfi.fh.IsDir } -func (rfi rarFileInfo) Sys() interface{} { return nil } - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(Rar)) - _ = Unarchiver(new(Rar)) - _ = Walker(new(Rar)) - _ = Extractor(new(Rar)) - _ = Matcher(new(Rar)) - _ = os.FileInfo(rarFileInfo{}) -) - -// DefaultRar is a convenient archiver ready to use. -var DefaultRar = &Rar{ - MkdirAll: true, -} diff --git a/archiver/tar.go b/archiver/tar.go deleted file mode 100644 index dd9cf0d2..00000000 --- a/archiver/tar.go +++ /dev/null @@ -1,603 +0,0 @@ -package archiver - -import ( - "archive/tar" - "bytes" - "fmt" - "io" - "log" - "os" - "path" - "path/filepath" - "strconv" - "strings" -) - -// Tar provides facilities for operating TAR archives. -// See http://www.gnu.org/software/tar/manual/html_node/Standard.html. -type Tar struct { - // Whether to overwrite existing files; if false, - // an error is returned if the file exists. - OverwriteExisting bool - - // Whether to make all the directories necessary - // to create a tar archive in the desired path. - MkdirAll bool - - // A single top-level folder can be implicitly - // created by the Archive or Unarchive methods - // if the files to be added to the archive - // or the files to be extracted from the archive - // do not all have a common root. This roughly - // mimics the behavior of archival tools integrated - // into OS file browsers which create a subfolder - // to avoid unexpectedly littering the destination - // folder with potentially many files, causing a - // problematic cleanup/organization situation. - // This feature is available for both creation - // and extraction of archives, but may be slightly - // inefficient with lots and lots of files, - // especially on extraction. - ImplicitTopLevelFolder bool - - // If true, errors encountered during reading - // or writing a single file will be logged and - // the operation will continue on remaining files. - ContinueOnError bool - - tw *tar.Writer - tr *tar.Reader - - readerWrapFn func(io.Reader) (io.Reader, error) - writerWrapFn func(io.Writer) (io.Writer, error) - cleanupWrapFn func() -} - -// Archive creates a tarball file at destination containing -// the files listed in sources. The destination must end with -// ".tar". File paths can be those of regular files or -// directories; directories will be recursively added. -func (t *Tar) Archive(sources []string, destination string) error { - if t.writerWrapFn == nil && !strings.HasSuffix(destination, ".tar") { - return fmt.Errorf("output filename must have .tar extension") - } - if !t.OverwriteExisting && fileExists(destination) { - return fmt.Errorf("file already exists: %s", destination) - } - - // make the folder to contain the resulting archive - // if it does not already exist - destDir := filepath.Dir(destination) - if t.MkdirAll && !fileExists(destDir) { - err := mkdir(destDir) - if err != nil { - return fmt.Errorf("making folder for destination: %v", err) - } - } - - out, err := os.Create(destination) - if err != nil { - return fmt.Errorf("creating %s: %v", destination, err) - } - defer out.Close() - - err = t.Create(out) - if err != nil { - return fmt.Errorf("creating tar: %v", err) - } - defer t.Close() - - var topLevelFolder string - if t.ImplicitTopLevelFolder && multipleTopLevels(sources) { - topLevelFolder = folderNameFromFileName(destination) - } - - for _, source := range sources { - err := t.writeWalk(source, topLevelFolder, destination) - if err != nil { - return fmt.Errorf("walking %s: %v", source, err) - } - } - - return nil -} - -// Unarchive unpacks the .tar file at source to destination. -// Destination will be treated as a folder name. -func (t *Tar) Unarchive(source, destination string) error { - if !fileExists(destination) && t.MkdirAll { - err := mkdir(destination) - if err != nil { - return fmt.Errorf("preparing destination: %v", err) - } - } - - // if the files in the archive do not all share a common - // root, then make sure we extract to a single subfolder - // rather than potentially littering the destination... - if t.ImplicitTopLevelFolder { - var err error - destination, err = t.addTopLevelFolder(source, destination) - if err != nil { - return fmt.Errorf("scanning source archive: %v", err) - } - } - - file, err := os.Open(source) - if err != nil { - return fmt.Errorf("opening source archive: %v", err) - } - defer file.Close() - - err = t.Open(file, 0) - if err != nil { - return fmt.Errorf("opening tar archive for reading: %v", err) - } - defer t.Close() - - for { - err := t.untarNext(destination) - if err == io.EOF { - break - } - if err != nil { - if t.ContinueOnError { - log.Printf("[ERROR] Reading file in tar archive: %v", err) - continue - } - return fmt.Errorf("reading file in tar archive: %v", err) - } - } - - return nil -} - -// addTopLevelFolder scans the files contained inside -// the tarball named sourceArchive and returns a modified -// destination if all the files do not share the same -// top-level folder. -func (t *Tar) addTopLevelFolder(sourceArchive, destination string) (string, error) { - file, err := os.Open(sourceArchive) - if err != nil { - return "", fmt.Errorf("opening source archive: %v", err) - } - defer file.Close() - - // if the reader is to be wrapped, ensure we do that now - // or we will not be able to read the archive successfully - reader := io.Reader(file) - if t.readerWrapFn != nil { - reader, err = t.readerWrapFn(reader) - if err != nil { - return "", fmt.Errorf("wrapping reader: %v", err) - } - } - if t.cleanupWrapFn != nil { - defer t.cleanupWrapFn() - } - - tr := tar.NewReader(reader) - - var files []string - for { - hdr, err := tr.Next() - if err == io.EOF { - break - } - if err != nil { - return "", fmt.Errorf("scanning tarball's file listing: %v", err) - } - files = append(files, hdr.Name) - } - - if multipleTopLevels(files) { - destination = filepath.Join(destination, folderNameFromFileName(sourceArchive)) - } - - return destination, nil -} - -func (t *Tar) untarNext(to string) error { - f, err := t.Read() - if err != nil { - return err // don't wrap error; calling loop must break on io.EOF - } - header, ok := f.Header.(*tar.Header) - if !ok { - return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header) - } - return t.untarFile(f, filepath.Join(to, header.Name)) -} - -func (t *Tar) untarFile(f File, to string) error { - // do not overwrite existing files, if configured - if !f.IsDir() && !t.OverwriteExisting && fileExists(to) { - return fmt.Errorf("file already exists: %s", to) - } - - hdr, ok := f.Header.(*tar.Header) - if !ok { - return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header) - } - - switch hdr.Typeflag { - case tar.TypeDir: - return mkdir(to) - case tar.TypeReg, tar.TypeRegA, tar.TypeChar, tar.TypeBlock, tar.TypeFifo: - return writeNewFile(to, f, f.Mode()) - case tar.TypeSymlink: - return writeNewSymbolicLink(to, hdr.Linkname) - case tar.TypeLink: - return writeNewHardLink(to, filepath.Join(to, hdr.Linkname)) - case tar.TypeXGlobalHeader: - return nil // ignore the pax global header from git-generated tarballs - default: - return fmt.Errorf("%s: unknown type flag: %c", hdr.Name, hdr.Typeflag) - } -} - -func (t *Tar) writeWalk(source, topLevelFolder, destination string) error { - sourceAbs, err := filepath.Abs(source) - if err != nil { - return fmt.Errorf("getting absolute path: %v", err) - } - sourceInfo, err := os.Stat(sourceAbs) - if err != nil { - return fmt.Errorf("%s: stat: %v", source, err) - } - destAbs, err := filepath.Abs(destination) - if err != nil { - return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err) - } - - var baseDir string - if topLevelFolder != "" { - baseDir = topLevelFolder - } - if sourceInfo.IsDir() { - baseDir = path.Join(baseDir, sourceInfo.Name()) - } - - return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error { - handleErr := func(err error) error { - if t.ContinueOnError { - log.Printf("[ERROR] Walking %s: %v", fpath, err) - return nil - } - return err - } - if err != nil { - return handleErr(fmt.Errorf("traversing %s: %v", fpath, err)) - } - if info == nil { - return handleErr(fmt.Errorf("no file info")) - } - - // make sure we do not copy our output file into itself - fpathAbs, err := filepath.Abs(fpath) - if err != nil { - return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err)) - } - if within(fpathAbs, destAbs) { - return nil - } - - // build the name to be used in the archive - name, err := filepath.Rel(source, fpath) - if err != nil { - return handleErr(err) - } - nameInArchive := path.Join(baseDir, filepath.ToSlash(name)) - - file, err := os.Open(fpath) - if err != nil { - return handleErr(fmt.Errorf("%s: opening: %v", fpath, err)) - } - defer file.Close() - - err = t.Write(File{ - FileInfo: FileInfo{ - FileInfo: info, - CustomName: nameInArchive, - }, - ReadCloser: file, - }) - if err != nil { - return handleErr(fmt.Errorf("%s: writing: %s", fpath, err)) - } - - return nil - }) -} - -// Create opens t for writing a tar archive to out. -func (t *Tar) Create(out io.Writer) error { - if t.tw != nil { - return fmt.Errorf("tar archive is already created for writing") - } - - // wrapping writers allows us to output - // compressed tarballs, for example - if t.writerWrapFn != nil { - var err error - out, err = t.writerWrapFn(out) - if err != nil { - return fmt.Errorf("wrapping writer: %v", err) - } - } - - t.tw = tar.NewWriter(out) - return nil -} - -// Write writes f to t, which must have been opened for writing first. -func (t *Tar) Write(f File) error { - if t.tw == nil { - return fmt.Errorf("tar archive was not created for writing first") - } - if f.FileInfo == nil { - return fmt.Errorf("no file info") - } - if f.FileInfo.Name() == "" { - return fmt.Errorf("missing file name") - } - if f.ReadCloser == nil { - return fmt.Errorf("%s: no way to read file contents", f.Name()) - } - - hdr, err := tar.FileInfoHeader(f, f.Name()) - if err != nil { - return fmt.Errorf("%s: making header: %v", f.Name(), err) - } - - err = t.tw.WriteHeader(hdr) - if err != nil { - return fmt.Errorf("%s: writing header: %v", hdr.Name, err) - } - - if f.IsDir() { - return nil - } - - if hdr.Typeflag == tar.TypeReg { - _, err := io.Copy(t.tw, f) - if err != nil { - return fmt.Errorf("%s: copying contents: %v", f.Name(), err) - } - } - - return nil -} - -// Open opens t for reading an archive from -// in. The size parameter is not used. -func (t *Tar) Open(in io.Reader, size int64) error { - if t.tr != nil { - return fmt.Errorf("tar archive is already open for reading") - } - // wrapping readers allows us to open compressed tarballs - if t.readerWrapFn != nil { - var err error - in, err = t.readerWrapFn(in) - if err != nil { - return fmt.Errorf("wrapping file reader: %v", err) - } - } - t.tr = tar.NewReader(in) - return nil -} - -// Read reads the next file from t, which must have -// already been opened for reading. If there are no -// more files, the error is io.EOF. The File must -// be closed when finished reading from it. -func (t *Tar) Read() (File, error) { - if t.tr == nil { - return File{}, fmt.Errorf("tar archive is not open") - } - - hdr, err := t.tr.Next() - if err != nil { - return File{}, err // don't wrap error; preserve io.EOF - } - - file := File{ - FileInfo: hdr.FileInfo(), - Header: hdr, - ReadCloser: ReadFakeCloser{t.tr}, - } - - return file, nil -} - -// Close closes the tar archive(s) opened by Create and Open. -func (t *Tar) Close() error { - var err error - if t.tr != nil { - t.tr = nil - } - if t.tw != nil { - tw := t.tw - t.tw = nil - err = tw.Close() - } - // make sure cleanup of "Reader/Writer wrapper" - // (say that ten times fast) happens AFTER the - // underlying stream is closed - if t.cleanupWrapFn != nil { - t.cleanupWrapFn() - } - return err -} - -// Walk calls walkFn for each visited item in archive. -func (t *Tar) Walk(archive string, walkFn WalkFunc) error { - file, err := os.Open(archive) - if err != nil { - return fmt.Errorf("opening archive file: %v", err) - } - defer file.Close() - - err = t.Open(file, 0) - if err != nil { - return fmt.Errorf("opening archive: %v", err) - } - defer t.Close() - - for { - f, err := t.Read() - if err == io.EOF { - break - } - if err != nil { - if t.ContinueOnError { - log.Printf("[ERROR] Opening next file: %v", err) - continue - } - return fmt.Errorf("opening next file: %v", err) - } - err = walkFn(f) - if err != nil { - if err == ErrStopWalk { - break - } - if t.ContinueOnError { - log.Printf("[ERROR] Walking %s: %v", f.Name(), err) - continue - } - return fmt.Errorf("walking %s: %v", f.Name(), err) - } - } - - return nil -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (t *Tar) Extract(source, target, destination string) error { - // target refers to a path inside the archive, which should be clean also - target = path.Clean(target) - - // if the target ends up being a directory, then - // we will continue walking and extracting files - // until we are no longer within that directory - var targetDirPath string - - return t.Walk(source, func(f File) error { - th, ok := f.Header.(*tar.Header) - if !ok { - return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header) - } - - // importantly, cleaning the path strips tailing slash, - // which must be appended to folders within the archive - name := path.Clean(th.Name) - if f.IsDir() && target == name { - targetDirPath = path.Dir(name) - } - - if within(target, th.Name) { - // either this is the exact file we want, or is - // in the directory we want to extract - - // build the filename we will extract to - end, err := filepath.Rel(targetDirPath, th.Name) - if err != nil { - return fmt.Errorf("relativizing paths: %v", err) - } - joined := filepath.Join(destination, end) - - err = t.untarFile(f, joined) - if err != nil { - return fmt.Errorf("extracting file %s: %v", th.Name, err) - } - - // if our target was not a directory, stop walk - if targetDirPath == "" { - return ErrStopWalk - } - } else if targetDirPath != "" { - // finished walking the entire directory - return ErrStopWalk - } - - return nil - }) -} - -// Match returns true if the format of file matches this -// type's format. It should not affect reader position. -func (*Tar) Match(file *os.File) (bool, error) { - currentPos, err := file.Seek(0, io.SeekCurrent) - if err != nil { - return false, err - } - _, err = file.Seek(0, 0) - if err != nil { - return false, err - } - defer file.Seek(currentPos, io.SeekStart) - - buf := make([]byte, tarBlockSize) - if _, err = io.ReadFull(file, buf); err != nil { - return false, nil - } - return hasTarHeader(buf), nil -} - -// hasTarHeader checks passed bytes has a valid tar header or not. buf must -// contain at least 512 bytes and if not, it always returns false. -func hasTarHeader(buf []byte) bool { - if len(buf) < tarBlockSize { - return false - } - - b := buf[148:156] - b = bytes.Trim(b, " \x00") // clean up all spaces and null bytes - if len(b) == 0 { - return false // unknown format - } - hdrSum, err := strconv.ParseUint(string(b), 8, 64) - if err != nil { - return false - } - - // According to the go official archive/tar, Sun tar uses signed byte - // values so this calcs both signed and unsigned - var usum uint64 - var sum int64 - for i, c := range buf { - if 148 <= i && i < 156 { - c = ' ' // checksum field itself is counted as branks - } - usum += uint64(uint8(c)) - sum += int64(int8(c)) - } - - if hdrSum != usum && int64(hdrSum) != sum { - return false // invalid checksum - } - - return true -} - -func (t *Tar) String() string { return "tar" } - -const tarBlockSize = 512 - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(Tar)) - _ = Writer(new(Tar)) - _ = Archiver(new(Tar)) - _ = Unarchiver(new(Tar)) - _ = Walker(new(Tar)) - _ = Extractor(new(Tar)) - _ = Matcher(new(Tar)) -) - -// DefaultTar is a convenient archiver ready to use. -var DefaultTar = &Tar{ - MkdirAll: true, -} diff --git a/archiver/tarbz2.go b/archiver/tarbz2.go deleted file mode 100644 index 2b44bf4b..00000000 --- a/archiver/tarbz2.go +++ /dev/null @@ -1,112 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "strings" - - "github.com/dsnet/compress/bzip2" -) - -// TarBz2 facilitates bzip2 compression -// (https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf) -// of tarball archives. -type TarBz2 struct { - *Tar - - CompressionLevel int -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.bz2" or ".tbz2". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (tbz2 *TarBz2) Archive(sources []string, destination string) error { - if !strings.HasSuffix(destination, ".tar.bz2") && - !strings.HasSuffix(destination, ".tbz2") { - return fmt.Errorf("output filename must have .tar.bz2 or .tbz2 extension") - } - tbz2.wrapWriter() - return tbz2.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (tbz2 *TarBz2) Unarchive(source, destination string) error { - tbz2.wrapReader() - return tbz2.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (tbz2 *TarBz2) Walk(archive string, walkFn WalkFunc) error { - tbz2.wrapReader() - return tbz2.Tar.Walk(archive, walkFn) -} - -// Create opens tbz2 for writing a compressed -// tar archive to out. -func (tbz2 *TarBz2) Create(out io.Writer) error { - tbz2.wrapWriter() - return tbz2.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (tbz2 *TarBz2) Open(in io.Reader, size int64) error { - tbz2.wrapReader() - return tbz2.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (tbz2 *TarBz2) Extract(source, target, destination string) error { - tbz2.wrapReader() - return tbz2.Tar.Extract(source, target, destination) -} - -func (tbz2 *TarBz2) wrapWriter() { - var bz2w *bzip2.Writer - tbz2.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - var err error - bz2w, err = bzip2.NewWriter(w, &bzip2.WriterConfig{ - Level: tbz2.CompressionLevel, - }) - return bz2w, err - } - tbz2.Tar.cleanupWrapFn = func() { - bz2w.Close() - } -} - -func (tbz2 *TarBz2) wrapReader() { - var bz2r *bzip2.Reader - tbz2.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - var err error - bz2r, err = bzip2.NewReader(r, nil) - return bz2r, err - } - tbz2.Tar.cleanupWrapFn = func() { - bz2r.Close() - } -} - -func (tbz2 *TarBz2) String() string { return "tar.bz2" } - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarBz2)) - _ = Writer(new(TarBz2)) - _ = Archiver(new(TarBz2)) - _ = Unarchiver(new(TarBz2)) - _ = Walker(new(TarBz2)) - _ = Extractor(new(TarBz2)) -) - -// DefaultTarBz2 is a convenient archiver ready to use. -var DefaultTarBz2 = &TarBz2{ - CompressionLevel: bzip2.DefaultCompression, - Tar: DefaultTar, -} diff --git a/archiver/targz.go b/archiver/targz.go deleted file mode 100644 index 513e71ed..00000000 --- a/archiver/targz.go +++ /dev/null @@ -1,110 +0,0 @@ -package archiver - -import ( - "compress/gzip" - "fmt" - "io" - "strings" -) - -// TarGz facilitates gzip compression -// (RFC 1952) of tarball archives. -type TarGz struct { - *Tar - - // The compression level to use, as described - // in the compress/gzip package. - CompressionLevel int -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.gz" or ".tgz". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (tgz *TarGz) Archive(sources []string, destination string) error { - if !strings.HasSuffix(destination, ".tar.gz") && - !strings.HasSuffix(destination, ".tgz") { - return fmt.Errorf("output filename must have .tar.gz or .tgz extension") - } - tgz.wrapWriter() - return tgz.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (tgz *TarGz) Unarchive(source, destination string) error { - tgz.wrapReader() - return tgz.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (tgz *TarGz) Walk(archive string, walkFn WalkFunc) error { - tgz.wrapReader() - return tgz.Tar.Walk(archive, walkFn) -} - -// Create opens txz for writing a compressed -// tar archive to out. -func (tgz *TarGz) Create(out io.Writer) error { - tgz.wrapWriter() - return tgz.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (tgz *TarGz) Open(in io.Reader, size int64) error { - tgz.wrapReader() - return tgz.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (tgz *TarGz) Extract(source, target, destination string) error { - tgz.wrapReader() - return tgz.Tar.Extract(source, target, destination) -} - -func (tgz *TarGz) wrapWriter() { - var gzw *gzip.Writer - tgz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - var err error - gzw, err = gzip.NewWriterLevel(w, tgz.CompressionLevel) - return gzw, err - } - tgz.Tar.cleanupWrapFn = func() { - gzw.Close() - } -} - -func (tgz *TarGz) wrapReader() { - var gzr *gzip.Reader - tgz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - var err error - gzr, err = gzip.NewReader(r) - return gzr, err - } - tgz.Tar.cleanupWrapFn = func() { - gzr.Close() - } -} - -func (tgz *TarGz) String() string { return "tar.gz" } - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarGz)) - _ = Writer(new(TarGz)) - _ = Archiver(new(TarGz)) - _ = Unarchiver(new(TarGz)) - _ = Walker(new(TarGz)) - _ = Extractor(new(TarGz)) -) - -// DefaultTarGz is a convenient archiver ready to use. -var DefaultTarGz = &TarGz{ - CompressionLevel: gzip.DefaultCompression, - Tar: DefaultTar, -} diff --git a/archiver/tarlz4.go b/archiver/tarlz4.go deleted file mode 100644 index 10be5f26..00000000 --- a/archiver/tarlz4.go +++ /dev/null @@ -1,107 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "strings" - - "github.com/pierrec/lz4" -) - -// TarLz4 facilitates lz4 compression -// (https://github.com/lz4/lz4/tree/master/doc) -// of tarball archives. -type TarLz4 struct { - *Tar - - // The compression level to use when writing. - // Minimum 0 (fast compression), maximum 12 - // (most space savings). - CompressionLevel int -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.lz4" or ".tlz4". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (tlz4 *TarLz4) Archive(sources []string, destination string) error { - if !strings.HasSuffix(destination, ".tar.lz4") && - !strings.HasSuffix(destination, ".tlz4") { - return fmt.Errorf("output filename must have .tar.lz4 or .tlz4 extension") - } - tlz4.wrapWriter() - return tlz4.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (tlz4 *TarLz4) Unarchive(source, destination string) error { - tlz4.wrapReader() - return tlz4.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (tlz4 *TarLz4) Walk(archive string, walkFn WalkFunc) error { - tlz4.wrapReader() - return tlz4.Tar.Walk(archive, walkFn) -} - -// Create opens tlz4 for writing a compressed -// tar archive to out. -func (tlz4 *TarLz4) Create(out io.Writer) error { - tlz4.wrapWriter() - return tlz4.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (tlz4 *TarLz4) Open(in io.Reader, size int64) error { - tlz4.wrapReader() - return tlz4.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (tlz4 *TarLz4) Extract(source, target, destination string) error { - tlz4.wrapReader() - return tlz4.Tar.Extract(source, target, destination) -} - -func (tlz4 *TarLz4) wrapWriter() { - var lz4w *lz4.Writer - tlz4.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - lz4w = lz4.NewWriter(w) - lz4w.Header.CompressionLevel = tlz4.CompressionLevel - return lz4w, nil - } - tlz4.Tar.cleanupWrapFn = func() { - lz4w.Close() - } -} - -func (tlz4 *TarLz4) wrapReader() { - tlz4.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - return lz4.NewReader(r), nil - } -} - -func (tlz4 *TarLz4) String() string { return "tar.lz4" } - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarLz4)) - _ = Writer(new(TarLz4)) - _ = Archiver(new(TarLz4)) - _ = Unarchiver(new(TarLz4)) - _ = Walker(new(TarLz4)) - _ = Extractor(new(TarLz4)) -) - -// DefaultTarLz4 is a convenient archiver ready to use. -var DefaultTarLz4 = &TarLz4{ - CompressionLevel: 9, // https://github.com/lz4/lz4/blob/1b819bfd633ae285df2dfe1b0589e1ec064f2873/lib/lz4hc.h#L48 - Tar: DefaultTar, -} diff --git a/archiver/tarsz.go b/archiver/tarsz.go deleted file mode 100644 index 4533c3df..00000000 --- a/archiver/tarsz.go +++ /dev/null @@ -1,100 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "strings" - - "github.com/golang/snappy" -) - -// TarSz facilitates Snappy compression -// (https://github.com/google/snappy) -// of tarball archives. -type TarSz struct { - *Tar -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.sz" or ".tsz". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (tsz *TarSz) Archive(sources []string, destination string) error { - if !strings.HasSuffix(destination, ".tar.sz") && - !strings.HasSuffix(destination, ".tsz") { - return fmt.Errorf("output filename must have .tar.sz or .tsz extension") - } - tsz.wrapWriter() - return tsz.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (tsz *TarSz) Unarchive(source, destination string) error { - tsz.wrapReader() - return tsz.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (tsz *TarSz) Walk(archive string, walkFn WalkFunc) error { - tsz.wrapReader() - return tsz.Tar.Walk(archive, walkFn) -} - -// Create opens tsz for writing a compressed -// tar archive to out. -func (tsz *TarSz) Create(out io.Writer) error { - tsz.wrapWriter() - return tsz.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (tsz *TarSz) Open(in io.Reader, size int64) error { - tsz.wrapReader() - return tsz.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (tsz *TarSz) Extract(source, target, destination string) error { - tsz.wrapReader() - return tsz.Tar.Extract(source, target, destination) -} - -func (tsz *TarSz) wrapWriter() { - var sw *snappy.Writer - tsz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - sw = snappy.NewWriter(w) - return sw, nil - } - tsz.Tar.cleanupWrapFn = func() { - sw.Close() - } -} - -func (tsz *TarSz) wrapReader() { - tsz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - return snappy.NewReader(r), nil - } -} - -func (tsz *TarSz) String() string { return "tar.sz" } - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarSz)) - _ = Writer(new(TarSz)) - _ = Archiver(new(TarSz)) - _ = Unarchiver(new(TarSz)) - _ = Walker(new(TarSz)) - _ = Extractor(new(TarSz)) -) - -// DefaultTarSz is a convenient archiver ready to use. -var DefaultTarSz = &TarSz{ - Tar: DefaultTar, -} diff --git a/archiver/tarxz.go b/archiver/tarxz.go deleted file mode 100644 index c1d27ea9..00000000 --- a/archiver/tarxz.go +++ /dev/null @@ -1,105 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "strings" - - "github.com/ulikunitz/xz" - fastxz "github.com/xi2/xz" -) - -// TarXz facilitates xz compression -// (https://tukaani.org/xz/format.html) -// of tarball archives. -type TarXz struct { - *Tar -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.gz" or ".txz". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (txz *TarXz) Archive(sources []string, destination string) error { - if !strings.HasSuffix(destination, ".tar.xz") && - !strings.HasSuffix(destination, ".txz") { - return fmt.Errorf("output filename must have .tar.xz or .txz extension") - } - txz.wrapWriter() - return txz.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (txz *TarXz) Unarchive(source, destination string) error { - txz.wrapReader() - return txz.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (txz *TarXz) Walk(archive string, walkFn WalkFunc) error { - txz.wrapReader() - return txz.Tar.Walk(archive, walkFn) -} - -// Create opens txz for writing a compressed -// tar archive to out. -func (txz *TarXz) Create(out io.Writer) error { - txz.wrapWriter() - return txz.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (txz *TarXz) Open(in io.Reader, size int64) error { - txz.wrapReader() - return txz.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (txz *TarXz) Extract(source, target, destination string) error { - txz.wrapReader() - return txz.Tar.Extract(source, target, destination) -} - -func (txz *TarXz) wrapWriter() { - var xzw *xz.Writer - txz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - var err error - xzw, err = xz.NewWriter(w) - return xzw, err - } - txz.Tar.cleanupWrapFn = func() { - xzw.Close() - } -} - -func (txz *TarXz) wrapReader() { - var xzr *fastxz.Reader - txz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - var err error - xzr, err = fastxz.NewReader(r, 0) - return xzr, err - } -} - -func (txz *TarXz) String() string { return "tar.xz" } - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarXz)) - _ = Writer(new(TarXz)) - _ = Archiver(new(TarXz)) - _ = Unarchiver(new(TarXz)) - _ = Walker(new(TarXz)) - _ = Extractor(new(TarXz)) -) - -// DefaultTarXz is a convenient archiver ready to use. -var DefaultTarXz = &TarXz{ - Tar: DefaultTar, -} diff --git a/archiver/testdata/already-compressed.jpg b/archiver/testdata/already-compressed.jpg deleted file mode 100644 index 3d599f8065e9b339f6be888e9f7ce941825b4c98..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8944 zcmb7pS5y;F(``ZxB^Y{@UP6`L!4L>ldIE%|^p13uCcT7ErGpBgh2D{>ROv|Xpi}`t z1pyUAFW-0n|KUE~IWKe8UUSwtYoE1dX3xLnf1dylT}>TL01yZO0B;xI-x@$20HL6y zq9lha|G{owL_kRNZxujEc6MiZ?!`@|B=n3NE9vd6n@cTP!7WB~+UxgJG0% zM4{}hjfR0kX+G28!I9t*Q#R({C@&-K0F$4m`Kycifz=l^O%+qUI33E>PkBtWb)E~0 z79f`$yqq~wV`9*dSIeC21Cq0M2HC58(cxO5RMe&J7EL#Xe2{jb-2|tdD@4C>{wtR@ z=xOOu#Z!S3c!3j+*%yM?3m4QhO%2OfPI{nPf#oj#yt}-EE+ui!PcW&n@*D9kPn{!{ zTQa2;j?x)~FbRhl`z#5V>?E<~+O#X@RvyK=jaSVtTSp8jwFgVM$kw*|HWzu6h81~O zNirD4=d6`CY>cTTO$LmqZ6H|xTIat_yEXCyuAVNIi-mOG_Cvhd6WlWL2aX>CDI z_{%dnqar;bDykek&wGCa=&FI?5>rzhn>ck6aG_k)5VIhXfyq?aHy=|eB6ImB$MJ)S zhL&jPMpfLleJ6Lh#W9>P zMwS;BnQPZv{it~0@o2!>$lZGIoXjAhvce;~EI3Ejk4LfgB{FN)m6vVQwOMD-It3kv+6<`xJcfPf;!uAh46s#QfZd9$njOVE4z~D zagUkG*zsU?>s+r+D{uQ4(@ZzNk)SxvEMTQ;BKXDQ51l9ieVSmaSBsD3dq>?J6{*dt zH@+ye?)<=2tNSZeesRcni3C(rUv(MzUjM^k~+j$vZvE34DSv{qoY!&;}&7F6g+wkx*Sx_1+? zsl4%6KgCCWHT9W|mENI7_f5T1&6VYkg>A{T*QPals8IJ#5l@E@yUx#gu!T_R=JJxQ z{PgTj_7e9z2SHc)_pLiGk2}sSCb>S$_w3O%Rz++kp)bz^b-R(z8>fz^ zX8K-K?b#>vnEC$KLXPZMqXbO5wP zWhR~X%ll;FqR7iol_*R6(#xujHUHV)UAe1y*-90Xz7;jLnVPPP#qQ|YvWEAy`D^kP z1{3qmxF0X7^@jY_Oa%jN@hJW>+vlu2k3TQ2zY2hwq7}KPynN|}L|M20Opb#cE6!4_pG#l9NPAQ#m|jUf}-1)kMjE~8;qM{@PFGTk?$@D{6A z{?)9;cKr3T?!rPfsEHHJ{-GqrPU7z0-F3F{E{~M=!B{vYHc;@6^|eyE=epc(gXML#f!qT+34n5fnLryYs7_wmQI%^m^NRt%HM?XEV z!@;?=r?y5@-IWDuQsmZdBR!-ZFqzc4M+eWB%&s!;eya|OOVj1(ep@hO5LV1&@Buk2C%$_ z7c6q)HkQ6@dnu_ z;QD>o)g7sa)I=mfFR?4B>_niFZ_6PT-qoq|^Gnfye}KO8YvFUd0_-DkW^ClQx&}f~ z&o@Zd$BjFWV4NIB+;`sLHH>9AEICaX_kF4uR?<@f(_VVrw~gZX^IGjv^$pp%D^-PC z{0`^E(NM1JVB0&-BWQloKfthQ{X?~VLW=&))tAje$EPhs!wrEYyg?)P)u(SBFI~$W z_Q337e#HSl{mRXk6N|Pw!9VvZF~yHBf}@l%nOG+haK@wWH=sbH(I2fFVPymO=Iz)@ zFFVI~@6}ZKx1{|2TSy*H=RP2vzGCjtl#D-GK3JhFbcmCej?9WjTW1F6`Q$sC>VIU7 zH(Y!d!LZ|3DB7M=aVN|D;jaO{H$Hakf5qo9#2|IF^WD3|%PWlOo1L99b?s;{Yw8GO z$5Not?Vf2x#YK3<_p8TR{$Jh2Pk$INFfijBAmCPINB-K`R^bU<$vbyJquFWdDN4=y^}J9tZpaTJ(9Jp4tulxZ!^&> zE-1KI=O>gSS~h(@yVxUBX>xTZAf$`X?dYeQNP{Hn%Avjca9uyx>uU#X*7OfQsF%v>er9Rp~~-Se38J^jMclB zPFj7Zch$u-C8ck4`*IHUtC-F?eeh$MWKC6e z1{QxgKWoutil+KsRg51%1b+>*T0R9WlVDKmDAWD5;SFO;FaM;h7YV<9j~wd%1AMI= zC@-FW%OMi&8!Dtn+4+%tjaxyHBxwHaJ6{DXhg&a+y?sw|qTv_R3+IKNMhC@Qj^g1x zdQ--XZ6ExHy&UBb@1NmH5l}NP0XeuMzy0--(@Jcf?9TQX)AFBi4ps%W;U**UvbK!n z6$Zh9j)a{W-t2>|2r&Gq8S=ZY=O{;e$+2|aJ zzY0YtjnhFxlz$5S1El&q4r~8KzrbGM>jiY#Rra4h-)ODduGsJ^(IDYcgv^6mDHeSl zF9inVjMIJjdyblmSp6Hh+?FpjG43*+vtuOhRV)vZL|6)2u|j|s!=}$#J)S#{{dCF- zt)bM5v<_=&)Yy7jK00<+Tqyiu_4C{Y!co34H|F;4dZu4_P$J5r>qlcV$hEWyUwS9u zA3JmennRnwyM_EkS=CCST$)(ksnV%ZJqWdvUWJ*3^_1bB$s@<~m~s8ww`&VwfxeZF3LZA;OBkK|njGv_VQOc68Ae?UcIgSkdutY)$<1 ztWut(slazWd!VSG!4>F3vDzN~^f;i>K+QN@jb_QiDNuz3TSCj1=rKM~wYWR6^VRgk zuT>-b0j9bq%D!vNw?+-qY4%;juHlJHBQ`>@{pCoqCn`H(z`rdS%lw#6{r=dRuIRlC zTGo5XR*haHZ6OYA@E5s8s$pHjKha+rglsYiu+eF#*I{docgOZvY9G{6+%wCQ-FtG= z39P^HQcLo{xCgkWM9CSiH8jG6Nz+pemWrqg)Rf;hBWk}=?|mp2ZJlZdU9OxaNsObg0`Xm8LJ!3gC_sgD@g3nm_uG6dIK3+V}0-sK)5zHxW~*P!5vmwY3qARF1Z#*Zms{_AYTL;r zPw+Ip`{o|c6R^Q5gPhqlrZl}_q^wN7TCn&FvJ#}CCcI_!!AScjk+p}dEQQ8<$J}3+ zG|^Qp-QyzPYa2cONE*gZfbQ+mv`kLOtyY}~4+e?+14wUv5w~dmswi$F?qN;TJkn?} zY(l20I^@x8S($(oRjv|YKG^RHnsl}Be%iWbSIvM*!I^7E0)@3jrvZG)#dbu^WLCe& zCw0x%&ksjjRsyfyt{5_%2=2g#22inoU3c*#d~5gC*la4(I%F=jO3T2H8qIncCh{lW zPz{3fO}*P{9WFiHY|J(oV_Y2nJd38HKSGr+=n9*7z(~617jQ{E*FvS-{{W+R8vX&y zRLc=1L5{Hs))~k=X2v~9hZXmQ7>q}GX=EB?u)(jvd{^$NpwXl$TDWapzHkqd8#>7- zwGqE2ybPjdul7U=28M+0fjnIbgYL>WIGANa>!uOG#y^OGR<3OY9;1gW+a{476M?$a zXDH%lZmy==v$NN*2VY@G%P(gu;%(;arE%{_;f(7dFtK0?y2b_xssut(|0fo~X+_QG zR4XNWp5{k)`DKmrq%G4Mr?MLaOjsFtTl$>vyM7+<(-ZFpE(GY{MTW13qZUHSHsfCR zGK|(TgX>Ub;(0pd+caO`^@0U^5UKPT?Qcr*e3{jZ1T$!ZT|pA4$c*Kc!o1U06k2JOf z9v#&9{brft!8$UnFeh_xSM2154m}r};=EhyNc9pp_D6RS@;k8<4Vo zMqE`ER(jkGZPx|jt035mw4?1hV+A(s4D!K-!MEjvQ+!l z+@PjNu}cFJy?+=$ZdvxKH@NBqfwFG1ugpD1j+=-&2&m47oZY+p(#5^+@+2B2xJKSJ zWNdxPysX8&vIX?BI6+g;-qyf!5CD*nfQXQogydG#`ELz;EBtUt5Yng_L%8jTI3;}} zX~j*_i~D$BaQns?zb#@qsi^-ciAnAQOuss`Lq3H>p9OA76Z+xz+XGbo0qR3fUmVDp zXcPLC)l(%6zkT(ynJ6x)pNNKP%YWXRUa4M1WyF6-Up;)!IWYX_Axs)s`tdC8$rcqI zG<%8ihVG|?a{Q0sdY>_>r#bnp6$TYo8tt2;){`UmPy-VZa1Os3hS!iNY$ zva2T&&wu|l%6dqB<-@a-5E8FIGabPCx~V2Ze^Kq|m5mlPkW%T$knsKsVt=&u1YjvZ zEkh21|8)5hylC=~4^-m`S8ydQ#Qh{A)F0!`e+4)$oGfE=`9Wel`70&*TH}?A{3yJ8 z*nsUli`+c!lWC{W-v@Lo;Q4nn1%PJLw*LVQe`N@I{z7a*(uDWkgyN2I4%@pBdx36mSegCzY)cI?4O;sB2hZC~QAg zJ$uAk540udCWhHrPx5@Uu}#X0{E|Zop-QnXotiQQv@Iv+r{K5jvAt>zSNshh$$l67 zB%hxF>Jl0B*}J$(brRPCr}IzJkKmlE&Zf)}%R$1;N=52~O)Y6N1=OP*tD3!-EGo3b zr|7F#kYV%cb~o2kcmwj0mauUg4WpFy&#l_MW$&`w*xVRxD!DU#dF*IPpRWtCE~LzM z3HD7q!8cK(Go#)Cx%yFqOlhoB%rz%HtA`p)^Vy>*P0d%M`$Ia=FJ5kYmJOpB<9_uxgD+tF1iqUEyIgj{FVITWm3vd`IQ=?tju__nW_5GMupGSTlf*mE$LBUoRyNH?jQ%RMxp(De9FGm3Ibtj5i|bx<7KlYyzXR{9EvOGf^AR9^LKSfN zMezFe$W6`|+gKy)N6whY0v~El`LWz5X!RlTS|8FdP&pK;93WoOnWKveAmj@DDH+FRo3AsVq9fiAf9i z2GVl|>Qm-TbN(@V?Gzvh0&vfQSo=8_5$W&y`^OfLeik=orU%@RDfgz z^rlow*Ch67HO2*{Who}ZZ-3|0_LJAGIK}{7&B~q!f9JP@M0gf*BRDl#S7xTei>ltl z_J71?89>T(D?rc^(AcZl3HH9I6TLI}D`ZCgVu<(jkE`|G2nWtU;p`ih#K99ZYs&)c zgv$R7x0z|`s;%K6@Ley3RQ`ML=AqB|4B69jQ*qiP#|Dl(Bkp$yayO_H-TW_uBmH8f zQ%+3ri3R7+PVzukG#7yGcFCZ5xsyUagrGX@9q-#DX? zDxsiU!4^L;@h)4>^*XHU^~O(h5jb8I$IU5aG7*7_5Myn*Mj~cf)*R_ZJCIkyO?sB0 za_8!dWZyrpoX|ai%N?K);WT`8XY=eyN~F#FJL)$=BMSgF?W58J6*_u^u~7UB)iK5C zD><|-WyE4Axac3il|`m2OZP(PboY%3jPSRX_zzRa%%cc809;HCWUjH_rA|6(mKGt$ zU#9#*%B0*q|GWN?94j}Wzg_1}AUIkFaFjUCnt&ucZaZv>a;WOA(x~brPpAU1)0e>FjnFMJa>IjF=N@4jBHyI zs|r}rWCH3H((k#4kI*`jGHq#aE;#vC`A!s~=;54C5u~7rB6^`yXkA08Yb(mQt+G}f zCsm}+5UPEYWW5>YxepKSfW)X zigaEe_M76P=TKrr8_pD2&IAv1`EW}h(qzYSz^2WDj50A4Z01lCd5~}6V3H4$1S_{8 zF5HCb(Yc3yk6XReI`Lv;Y6mcSKU^sbQfMS+W_Pzmh^1ITeKHN$va=?g#{HTi2~Cjt`tunb)|9q%2YBo> zj=BrZKJz=P3g1z11Q~H#pgiF`E@hhuTE_c`N_atDGmorOMDajU zks65(q;KX{!Xk5{BGfL*+W;RAA{Y)-q|b_F%ps-iEPTgVR!;s$56Dj-ZaC=-*{X5P zXVHERE6JPjV|XN)Tjpxy8_msE5;K8E**tXuNyk$6W~^`z;_k}eL`OyC3Q7`|A|{E@ z&iw%$$aa`UHwTjGGP{aYVO&+TuDBFHN07JDP*5*nJtO3tU?(#TtW<~e@fRDNttfn$ z(X+{AC6Y{AvW~7wGU*aSyB(EH%B(01Q1&t=0NFItSlcxssCL(JmSnE;EHNOqQ`#Bh z+52u4ARBq}m@1)I1d*7phJ8X*B`zD2Nk+4YXUHX53h?ltu8QlJ@pcz;-YcMXR0~OD z6?AB7w?2-1;+i`h1B2L8hkWGB6mK}PWNba50!B4LwxkT-agn`(a$kpZ?1am} zAyjnw$xtUIUc{43id8a_4B93;y|u=O1fM~-{CSq&Mf*zoO(8=C0#QiUSl8TFH5AmG7;vAJg1 zQ&%(lFuEMr!`t?O^L9*pYyTsk|EGNb03p-_oHXJRFf}ft|Mm$4K;V7Axf#~*mxqRO zm@Q&?A*k{hhsp!ofnkn&Ha0Xk4}?pKpH78L7^n3hjS=bxTDgdU^W_vi*ZnvQ6mR;T zd3e@dq=)T{R-|c5f8gG1*nMf2QFzY1>Hl?>ZZZ^ zHw(*BAnyhBic=Vp5cddTW8F-KK@N0f)59h5nMPeuUoC1XIzC=(>Gnp1BXDjXdW5?9_s8JfaVE?5^AxkF?TH8^1Zw z$gmi4%v#uHs~Qqd4fkOEphsdQ)Ew`OuM1?zrW?sOeI5`W^F(fFK`I6-@nMGqMUfsgk{Fk|dc53ID?hg=7X?jw;0 z3xCNEu}Q*T>#Ah}$IBX>o+wu`pzSxrvWK$P&yqwMy&P7a)~M7p-e`z4dUo5gFIv3C zL590@N{!iT_0KkSxL-2kJYPetHC#1 zUQNT?e{nQ)M2R%(v2Eil!iR@+L@{C{^2uPjq_@>FENy2pHe}>PkD!OpSu;i+8a6G$ z?@h*Y3n4gNSMQ(i34RSW{Dw9);cyB^$|{a8sgr4TXQ3<5>A$MPROD=01q>tA;}Z^} zOs_!mQzZP7XWPrgS``Vk8ZW7LU(l4SfWO}(zeyZ4lBP*44Uut^vHQG}O8g2tpQx5# z%xrP~L)M++Se1C37na-2Sn)mahL2h+8j&xWuX~?`&GnOxig}{pT6f%X6g#2$cFRZPaeb~sE7v!S~T5KqmHYVKm1$yKTerK ATmS$7 diff --git a/archiver/testdata/proverbs/extra/proverb3.txt b/archiver/testdata/proverbs/extra/proverb3.txt deleted file mode 100644 index 4a4768d4..00000000 --- a/archiver/testdata/proverbs/extra/proverb3.txt +++ /dev/null @@ -1,2 +0,0 @@ -"interface{} says nothing." - - Rob Pike \ No newline at end of file diff --git a/archiver/testdata/proverbs/proverb1.txt b/archiver/testdata/proverbs/proverb1.txt deleted file mode 100644 index 88da02ef..00000000 --- a/archiver/testdata/proverbs/proverb1.txt +++ /dev/null @@ -1,2 +0,0 @@ -"Channels orchestrate; mutexes serialize." - - Rob Pike \ No newline at end of file diff --git a/archiver/testdata/proverbs/proverb2.txt b/archiver/testdata/proverbs/proverb2.txt deleted file mode 100644 index 8e075027..00000000 --- a/archiver/testdata/proverbs/proverb2.txt +++ /dev/null @@ -1,2 +0,0 @@ -"A little copying is better than a little dependency." - - Rob Pike \ No newline at end of file diff --git a/archiver/testdata/quote1.txt b/archiver/testdata/quote1.txt deleted file mode 100644 index 1c34480d..00000000 --- a/archiver/testdata/quote1.txt +++ /dev/null @@ -1,2 +0,0 @@ -"Go has generics; they're called interfaces." - - Matt Holt \ No newline at end of file diff --git a/archiver/zip.go b/archiver/zip.go deleted file mode 100644 index 9828c630..00000000 --- a/archiver/zip.go +++ /dev/null @@ -1,573 +0,0 @@ -package archiver - -import ( - "archive/zip" - "bytes" - "compress/flate" - "fmt" - "io" - "log" - "os" - "path" - "path/filepath" - "strings" -) - -// Zip provides facilities for operating ZIP archives. -// See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT. -type Zip struct { - // The compression level to use, as described - // in the compress/flate package. - CompressionLevel int - - // Whether to overwrite existing files; if false, - // an error is returned if the file exists. - OverwriteExisting bool - - // Whether to make all the directories necessary - // to create a zip archive in the desired path. - MkdirAll bool - - // If enabled, selective compression will only - // compress files which are not already in a - // compressed format; this is decided based - // simply on file extension. - SelectiveCompression bool - - // A single top-level folder can be implicitly - // created by the Archive or Unarchive methods - // if the files to be added to the archive - // or the files to be extracted from the archive - // do not all have a common root. This roughly - // mimics the behavior of archival tools integrated - // into OS file browsers which create a subfolder - // to avoid unexpectedly littering the destination - // folder with potentially many files, causing a - // problematic cleanup/organization situation. - // This feature is available for both creation - // and extraction of archives, but may be slightly - // inefficient with lots and lots of files, - // especially on extraction. - ImplicitTopLevelFolder bool - - // If true, errors encountered during reading - // or writing a single file will be logged and - // the operation will continue on remaining files. - ContinueOnError bool - - zw *zip.Writer - zr *zip.Reader - ridx int -} - -// Archive creates a .zip file at destination containing -// the files listed in sources. The destination must end -// with ".zip". File paths can be those of regular files -// or directories. Regular files are stored at the 'root' -// of the archive, and directories are recursively added. -func (z *Zip) Archive(sources []string, destination string) error { - if !strings.HasSuffix(destination, ".zip") { - return fmt.Errorf("output filename must have .zip extension") - } - if !z.OverwriteExisting && fileExists(destination) { - return fmt.Errorf("file already exists: %s", destination) - } - - // make the folder to contain the resulting archive - // if it does not already exist - destDir := filepath.Dir(destination) - if z.MkdirAll && !fileExists(destDir) { - err := mkdir(destDir) - if err != nil { - return fmt.Errorf("making folder for destination: %v", err) - } - } - - out, err := os.Create(destination) - if err != nil { - return fmt.Errorf("creating %s: %v", destination, err) - } - defer out.Close() - - err = z.Create(out) - if err != nil { - return fmt.Errorf("creating zip: %v", err) - } - defer z.Close() - - var topLevelFolder string - if z.ImplicitTopLevelFolder && multipleTopLevels(sources) { - topLevelFolder = folderNameFromFileName(destination) - } - - for _, source := range sources { - err := z.writeWalk(source, topLevelFolder, destination) - if err != nil { - return fmt.Errorf("walking %s: %v", source, err) - } - } - - return nil -} - -// Unarchive unpacks the .zip file at source to destination. -// Destination will be treated as a folder name. -func (z *Zip) Unarchive(source, destination string) error { - if !fileExists(destination) && z.MkdirAll { - err := mkdir(destination) - if err != nil { - return fmt.Errorf("preparing destination: %v", err) - } - } - - file, err := os.Open(source) - if err != nil { - return fmt.Errorf("opening source file: %v", err) - } - defer file.Close() - - fileInfo, err := file.Stat() - if err != nil { - return fmt.Errorf("statting source file: %v", err) - } - - err = z.Open(file, fileInfo.Size()) - if err != nil { - return fmt.Errorf("opening zip archive for reading: %v", err) - } - defer z.Close() - - // if the files in the archive do not all share a common - // root, then make sure we extract to a single subfolder - // rather than potentially littering the destination... - if z.ImplicitTopLevelFolder { - files := make([]string, len(z.zr.File)) - for i := range z.zr.File { - files[i] = z.zr.File[i].Name - } - if multipleTopLevels(files) { - destination = filepath.Join(destination, folderNameFromFileName(source)) - } - } - - for { - err := z.extractNext(destination) - if err == io.EOF { - break - } - if err != nil { - if z.ContinueOnError { - log.Printf("[ERROR] Reading file in zip archive: %v", err) - continue - } - return fmt.Errorf("reading file in zip archive: %v", err) - } - } - - return nil -} - -func (z *Zip) extractNext(to string) error { - f, err := z.Read() - if err != nil { - return err // don't wrap error; calling loop must break on io.EOF - } - defer f.Close() - header, ok := f.Header.(zip.FileHeader) - if !ok { - return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) - } - return z.extractFile(f, filepath.Join(to, header.Name)) -} - -func (z *Zip) extractFile(f File, to string) error { - // if a directory, no content; simply make the directory and return - if f.IsDir() { - return mkdir(to) - } - - // do not overwrite existing files, if configured - if !z.OverwriteExisting && fileExists(to) { - return fmt.Errorf("file already exists: %s", to) - } - - return writeNewFile(to, f, f.Mode()) -} - -func (z *Zip) writeWalk(source, topLevelFolder, destination string) error { - sourceAbs, err := filepath.Abs(source) - if err != nil { - return fmt.Errorf("getting absolute path: %v", err) - } - sourceInfo, err := os.Stat(sourceAbs) - if err != nil { - return fmt.Errorf("%s: stat: %v", source, err) - } - destAbs, err := filepath.Abs(destination) - if err != nil { - return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err) - } - - var baseDir string - if topLevelFolder != "" { - baseDir = topLevelFolder - } - if sourceInfo.IsDir() { - baseDir = path.Join(baseDir, sourceInfo.Name()) - } - - return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error { - handleErr := func(err error) error { - if z.ContinueOnError { - log.Printf("[ERROR] Walking %s: %v", fpath, err) - return nil - } - return err - } - if err != nil { - return handleErr(fmt.Errorf("traversing %s: %v", fpath, err)) - } - if info == nil { - return handleErr(fmt.Errorf("%s: no file info", fpath)) - } - - // make sure we do not copy the output file into the output - // file; that results in an infinite loop and disk exhaustion! - fpathAbs, err := filepath.Abs(fpath) - if err != nil { - return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err)) - } - if within(fpathAbs, destAbs) { - return nil - } - - // build the name to be used within the archive - name, err := filepath.Rel(source, fpath) - if err != nil { - return handleErr(err) - } - nameInArchive := path.Join(baseDir, filepath.ToSlash(name)) - - file, err := os.Open(fpath) - if err != nil { - return handleErr(fmt.Errorf("%s: opening: %v", fpath, err)) - } - defer file.Close() - - err = z.Write(File{ - FileInfo: FileInfo{ - FileInfo: info, - CustomName: nameInArchive, - }, - ReadCloser: file, - }) - if err != nil { - return handleErr(fmt.Errorf("%s: writing: %s", fpath, err)) - } - - return nil - }) -} - -// Create opens z for writing a ZIP archive to out. -func (z *Zip) Create(out io.Writer) error { - if z.zw != nil { - return fmt.Errorf("zip archive is already created for writing") - } - z.zw = zip.NewWriter(out) - if z.CompressionLevel != flate.DefaultCompression { - z.zw.RegisterCompressor(zip.Deflate, func(out io.Writer) (io.WriteCloser, error) { - return flate.NewWriter(out, z.CompressionLevel) - }) - } - return nil -} - -// Write writes f to z, which must have been opened for writing first. -func (z *Zip) Write(f File) error { - if z.zw == nil { - return fmt.Errorf("zip archive was not created for writing first") - } - if f.FileInfo == nil { - return fmt.Errorf("no file info") - } - if f.FileInfo.Name() == "" { - return fmt.Errorf("missing file name") - } - if f.ReadCloser == nil { - return fmt.Errorf("%s: no way to read file contents", f.Name()) - } - - header, err := zip.FileInfoHeader(f) - if err != nil { - return fmt.Errorf("%s: getting header: %v", f.Name(), err) - } - - if f.IsDir() { - header.Name += "/" // required - strangely no mention of this in zip spec? but is in godoc... - header.Method = zip.Store - } else { - ext := strings.ToLower(path.Ext(header.Name)) - if _, ok := compressedFormats[ext]; ok && z.SelectiveCompression { - header.Method = zip.Store - } else { - header.Method = zip.Deflate - } - } - - writer, err := z.zw.CreateHeader(header) - if err != nil { - return fmt.Errorf("%s: making header: %v", f.Name(), err) - } - - if f.IsDir() { - return nil - } - - if header.Mode().IsRegular() { - _, err := io.Copy(writer, f) - if err != nil { - return fmt.Errorf("%s: copying contents: %v", f.Name(), err) - } - } - - return nil -} - -// Open opens z for reading an archive from in, -// which is expected to have the given size and -// which must be an io.ReaderAt. -func (z *Zip) Open(in io.Reader, size int64) error { - inRdrAt, ok := in.(io.ReaderAt) - if !ok { - return fmt.Errorf("reader must be io.ReaderAt") - } - if z.zr != nil { - return fmt.Errorf("zip archive is already open for reading") - } - var err error - z.zr, err = zip.NewReader(inRdrAt, size) - if err != nil { - return fmt.Errorf("creating reader: %v", err) - } - z.ridx = 0 - return nil -} - -// Read reads the next file from z, which must have -// already been opened for reading. If there are no -// more files, the error is io.EOF. The File must -// be closed when finished reading from it. -func (z *Zip) Read() (File, error) { - if z.zr == nil { - return File{}, fmt.Errorf("zip archive is not open") - } - if z.ridx >= len(z.zr.File) { - return File{}, io.EOF - } - - // access the file and increment counter so that - // if there is an error processing this file, the - // caller can still iterate to the next file - zf := z.zr.File[z.ridx] - z.ridx++ - - file := File{ - FileInfo: zf.FileInfo(), - Header: zf.FileHeader, - } - - rc, err := zf.Open() - if err != nil { - return file, fmt.Errorf("%s: open compressed file: %v", zf.Name, err) - } - file.ReadCloser = rc - - return file, nil -} - -// Close closes the zip archive(s) opened by Create and Open. -func (z *Zip) Close() error { - if z.zr != nil { - z.zr = nil - } - if z.zw != nil { - zw := z.zw - z.zw = nil - return zw.Close() - } - return nil -} - -// Walk calls walkFn for each visited item in archive. -func (z *Zip) Walk(archive string, walkFn WalkFunc) error { - zr, err := zip.OpenReader(archive) - if err != nil { - return fmt.Errorf("opening zip reader: %v", err) - } - defer zr.Close() - - for _, zf := range zr.File { - zfrc, err := zf.Open() - if err != nil { - zfrc.Close() - if z.ContinueOnError { - log.Printf("[ERROR] Opening %s: %v", zf.Name, err) - continue - } - return fmt.Errorf("opening %s: %v", zf.Name, err) - } - - err = walkFn(File{ - FileInfo: zf.FileInfo(), - Header: zf.FileHeader, - ReadCloser: zfrc, - }) - zfrc.Close() - if err != nil { - if err == ErrStopWalk { - break - } - if z.ContinueOnError { - log.Printf("[ERROR] Walking %s: %v", zf.Name, err) - continue - } - return fmt.Errorf("walking %s: %v", zf.Name, err) - } - } - - return nil -} - -// Extract extracts a single file from the zip archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (z *Zip) Extract(source, target, destination string) error { - // target refers to a path inside the archive, which should be clean also - target = path.Clean(target) - - // if the target ends up being a directory, then - // we will continue walking and extracting files - // until we are no longer within that directory - var targetDirPath string - - return z.Walk(source, func(f File) error { - zfh, ok := f.Header.(zip.FileHeader) - if !ok { - return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) - } - - // importantly, cleaning the path strips tailing slash, - // which must be appended to folders within the archive - name := path.Clean(zfh.Name) - if f.IsDir() && target == name { - targetDirPath = path.Dir(name) - } - - if within(target, zfh.Name) { - // either this is the exact file we want, or is - // in the directory we want to extract - - // build the filename we will extract to - end, err := filepath.Rel(targetDirPath, zfh.Name) - if err != nil { - return fmt.Errorf("relativizing paths: %v", err) - } - joined := filepath.Join(destination, end) - - err = z.extractFile(f, joined) - if err != nil { - return fmt.Errorf("extracting file %s: %v", zfh.Name, err) - } - - // if our target was not a directory, stop walk - if targetDirPath == "" { - return ErrStopWalk - } - } else if targetDirPath != "" { - // finished walking the entire directory - return ErrStopWalk - } - - return nil - }) -} - -// Match returns true if the format of file matches this -// type's format. It should not affect reader position. -func (*Zip) Match(file *os.File) (bool, error) { - currentPos, err := file.Seek(0, io.SeekCurrent) - if err != nil { - return false, err - } - _, err = file.Seek(0, 0) - if err != nil { - return false, err - } - defer file.Seek(currentPos, io.SeekStart) - - buf := make([]byte, 4) - if n, err := file.Read(buf); err != nil || n < 4 { - return false, nil - } - return bytes.Equal(buf, []byte("PK\x03\x04")), nil -} - -func (z *Zip) String() string { return "zip" } - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(Zip)) - _ = Writer(new(Zip)) - _ = Archiver(new(Zip)) - _ = Unarchiver(new(Zip)) - _ = Walker(new(Zip)) - _ = Extractor(new(Zip)) - _ = Matcher(new(Zip)) -) - -// compressedFormats is a (non-exhaustive) set of lowercased -// file extensions for formats that are typically already -// compressed. Compressing files that are already compressed -// is inefficient, so use this set of extension to avoid that. -var compressedFormats = map[string]struct{}{ - ".7z": {}, - ".avi": {}, - ".br": {}, - ".bz2": {}, - ".cab": {}, - ".docx": {}, - ".gif": {}, - ".gz": {}, - ".jar": {}, - ".jpeg": {}, - ".jpg": {}, - ".lz": {}, - ".lz4": {}, - ".lzma": {}, - ".m4v": {}, - ".mov": {}, - ".mp3": {}, - ".mp4": {}, - ".mpeg": {}, - ".mpg": {}, - ".png": {}, - ".pptx": {}, - ".rar": {}, - ".sz": {}, - ".tbz2": {}, - ".tgz": {}, - ".tsz": {}, - ".txz": {}, - ".xlsx": {}, - ".xz": {}, - ".zip": {}, - ".zipx": {}, -} - -// DefaultZip is a convenient archiver ready to use. -var DefaultZip = &Zip{ - CompressionLevel: flate.DefaultCompression, - MkdirAll: true, - SelectiveCompression: true, -} diff --git a/archiver_test.go b/archiver_test.go index 305030f7..dddb0bf9 100644 --- a/archiver_test.go +++ b/archiver_test.go @@ -2,124 +2,213 @@ package archiver import ( "bytes" + "fmt" + "io" "io/ioutil" "os" "path/filepath" "testing" ) -func TestArchiver(t *testing.T) { - for name, ar := range SupportedFormats { - name, ar := name, ar - t.Run(name, func(t *testing.T) { - t.Parallel() - // skip RAR for now - if _, ok := ar.(rarFormat); ok { - t.Skip("not supported") - } - testWriteRead(t, name, ar) - testMakeOpen(t, name, ar) - testMakeOpenWithDestinationEndingInSlash(t, name, ar) - }) +func TestWithin(t *testing.T) { + for i, tc := range []struct { + path1, path2 string + expect bool + }{ + { + path1: "/foo", + path2: "/foo/bar", + expect: true, + }, + { + path1: "/foo", + path2: "/foobar/asdf", + expect: false, + }, + { + path1: "/foobar/", + path2: "/foobar/asdf", + expect: true, + }, + { + path1: "/foobar/asdf", + path2: "/foobar", + expect: false, + }, + { + path1: "/foobar/asdf", + path2: "/foobar/", + expect: false, + }, + { + path1: "/", + path2: "/asdf", + expect: true, + }, + { + path1: "/asdf", + path2: "/asdf", + expect: true, + }, + { + path1: "/", + path2: "/", + expect: true, + }, + { + path1: "/foo/bar/daa", + path2: "/foo", + expect: false, + }, + { + path1: "/foo/", + path2: "/foo/bar/daa", + expect: true, + }, + } { + actual := within(tc.path1, tc.path2) + if actual != tc.expect { + t.Errorf("Test %d: [%s %s] Expected %t but got %t", i, tc.path1, tc.path2, tc.expect, actual) + } } } -// testWriteRead performs a symmetric test by using ar.Write to generate an archive -// from the test corpus, then using ar.Read to extract the archive and comparing -// the contents to ensure they are equal. -func testWriteRead(t *testing.T, name string, ar Archiver) { - buf := new(bytes.Buffer) - tmp, err := ioutil.TempDir("", "archiver") - if err != nil { - t.Fatalf("[%s] %v", name, err) - } - defer os.RemoveAll(tmp) - - // Test creating archive - err = ar.Write(buf, []string{"testdata"}) - if err != nil { - t.Fatalf("[%s] writing archive: didn't expect an error, but got: %v", name, err) +func TestMultipleTopLevels(t *testing.T) { + for i, tc := range []struct { + set []string + expect bool + }{ + { + set: []string{}, + expect: false, + }, + { + set: []string{"/foo"}, + expect: false, + }, + { + set: []string{"/foo", "/foo/bar"}, + expect: false, + }, + { + set: []string{"/foo", "/bar"}, + expect: true, + }, + { + set: []string{"/foo", "/foobar"}, + expect: true, + }, + { + set: []string{"foo", "foo/bar"}, + expect: false, + }, + { + set: []string{"foo", "/foo/bar"}, + expect: false, + }, + { + set: []string{"../foo", "foo/bar"}, + expect: true, + }, + { + set: []string{`C:\foo\bar`, `C:\foo\bar\zee`}, + expect: false, + }, + { + set: []string{`C:\`, `C:\foo\bar`}, + expect: false, + }, + { + set: []string{`D:\foo`, `E:\foo`}, + expect: true, + }, + { + set: []string{`D:\foo`, `D:\foo\bar`, `C:\foo`}, + expect: true, + }, + { + set: []string{"/foo", "/", "/bar"}, + expect: true, + }, + } { + actual := multipleTopLevels(tc.set) + if actual != tc.expect { + t.Errorf("Test %d: %v: Expected %t but got %t", i, tc.set, tc.expect, actual) + } } +} - // Test extracting archive - err = ar.Read(buf, tmp) - if err != nil { - t.Fatalf("[%s] reading archive: didn't expect an error, but got: %v", name, err) +func TestArchiveUnarchive(t *testing.T) { + for _, af := range archiveFormats { + au, ok := af.(archiverUnarchiver) + if !ok { + t.Errorf("%s (%T): not an Archiver and Unarchiver", af, af) + continue + } + testArchiveUnarchive(t, au) } - - // Check that what was extracted is what was compressed - symmetricTest(t, name, tmp) } -// testMakeOpen performs a symmetric test by using ar.Make to make an archive -// from the test corpus, then using ar.Open to open the archive and comparing -// the contents to ensure they are equal. -func testMakeOpen(t *testing.T, name string, ar Archiver) { - tmp, err := ioutil.TempDir("", "archiver") +func testArchiveUnarchive(t *testing.T, au archiverUnarchiver) { + auStr := fmt.Sprintf("%s", au) + + tmp, err := ioutil.TempDir("", "archiver_test") if err != nil { - t.Fatalf("[%s] %v", name, err) + t.Fatalf("[%s] %v", auStr, err) } defer os.RemoveAll(tmp) // Test creating archive - outfile := filepath.Join(tmp, "test-"+name) - err = ar.Make(outfile, []string{"testdata"}) + outfile := filepath.Join(tmp, "archiver_test."+auStr) + err = au.Archive([]string{"testdata"}, outfile) if err != nil { - t.Fatalf("[%s] making archive: didn't expect an error, but got: %v", name, err) + t.Fatalf("[%s] making archive: didn't expect an error, but got: %v", auStr, err) } - if !ar.Match(outfile) { - t.Fatalf("[%s] identifying format should be 'true', but got 'false'", name) - } + // Test format matching (TODO: Make this its own test, out of band with the archive/unarchive tests) + //testMatching(t, au, outfile) // TODO: Disabled until we can finish implementing this for compressed tar formats // Test extracting archive - dest := filepath.Join(tmp, "extraction_test") + dest := filepath.Join(tmp, "extraction_test_"+auStr) os.Mkdir(dest, 0755) - err = ar.Open(outfile, dest) + err = au.Unarchive(outfile, dest) if err != nil { - t.Fatalf("[%s] extracting archive [%s -> %s]: didn't expect an error, but got: %v", name, outfile, dest, err) + t.Fatalf("[%s] extracting archive [%s -> %s]: didn't expect an error, but got: %v", auStr, outfile, dest, err) } // Check that what was extracted is what was compressed - symmetricTest(t, name, dest) + symmetricTest(t, auStr, dest) } -// testMakeOpenWithDestinationEndingInSlash is similar to testMakeOpen except that -// it tests the case where destination path has a terminating forward slash especially -// on Windows os. -func testMakeOpenWithDestinationEndingInSlash(t *testing.T, name string, ar Archiver) { - tmp, err := ioutil.TempDir("", "archiver") - if err != nil { - t.Fatalf("[%s] %v", name, err) +// testMatching tests that au can match the format of archiveFile. +func testMatching(t *testing.T, au archiverUnarchiver, archiveFile string) { + m, ok := au.(Matcher) + if !ok { + t.Logf("[NOTICE] %T (%s) is not a Matcher", au, au) + return } - defer os.RemoveAll(tmp) - // Test creating archive - outfile := filepath.Join(tmp, "test-"+name) - err = ar.Make(outfile, []string{"testdata"}) + file, err := os.Open(archiveFile) if err != nil { - t.Fatalf("[%s] making archive: didn't expect an error, but got: %v", name, err) + t.Fatalf("[%s] opening file for matching: %v", au, err) } + defer file.Close() - if !ar.Match(outfile) { - t.Fatalf("[%s] identifying format should be 'true', but got 'false'", name) - } + tmpBuf := make([]byte, 2048) + io.ReadFull(file, tmpBuf) - // Test extracting archive with destination that has a slash at the end - dest := filepath.Join(tmp, "extraction_test") - os.Mkdir(dest, 0755) - err = ar.Open(outfile, dest+"/") + matched, err := m.Match(file) if err != nil { - t.Fatalf("[%s] extracting archive [%s -> %s]: didn't expect an error, but got: %v", name, outfile, dest, err) + t.Fatalf("%s (%T): testing matching: got error, expected none: %v", m, m, err) + } + if !matched { + t.Fatalf("%s (%T): format should have matched, but didn't", m, m) } - - // Check that what was extracted is what was compressed - symmetricTest(t, name, dest) } // symmetricTest compares the contents of a destination directory to the contents // of the test corpus and tests that they are equal. -func symmetricTest(t *testing.T, name, dest string) { +func symmetricTest(t *testing.T, formatName, dest string) { var expectedFileCount int filepath.Walk("testdata", func(fpath string, info os.FileInfo, err error) error { expectedFileCount++ @@ -137,14 +226,14 @@ func symmetricTest(t *testing.T, name, dest string) { origPath, err := filepath.Rel(dest, fpath) if err != nil { - t.Fatalf("[%s] %s: Error inducing original file path: %v", name, fpath, err) + t.Fatalf("[%s] %s: Error inducing original file path: %v", formatName, fpath, err) } if info.IsDir() { // stat dir instead of read file _, err = os.Stat(origPath) if err != nil { - t.Fatalf("[%s] %s: Couldn't stat original directory (%s): %v", name, + t.Fatalf("[%s] %s: Couldn't stat original directory (%s): %v", formatName, fpath, origPath, err) } return nil @@ -152,96 +241,50 @@ func symmetricTest(t *testing.T, name, dest string) { expectedFileInfo, err := os.Stat(origPath) if err != nil { - t.Fatalf("[%s] %s: Error obtaining original file info: %v", name, fpath, err) + t.Fatalf("[%s] %s: Error obtaining original file info: %v", formatName, fpath, err) } expected, err := ioutil.ReadFile(origPath) if err != nil { - t.Fatalf("[%s] %s: Couldn't open original file (%s) from disk: %v", name, + t.Fatalf("[%s] %s: Couldn't open original file (%s) from disk: %v", formatName, fpath, origPath, err) } actualFileInfo, err := os.Stat(fpath) if err != nil { - t.Fatalf("[%s] %s: Error obtaining actual file info: %v", name, fpath, err) + t.Fatalf("[%s] %s: Error obtaining actual file info: %v", formatName, fpath, err) } actual, err := ioutil.ReadFile(fpath) if err != nil { - t.Fatalf("[%s] %s: Couldn't open new file from disk: %v", name, fpath, err) + t.Fatalf("[%s] %s: Couldn't open new file from disk: %v", formatName, fpath, err) } if actualFileInfo.Mode() != expectedFileInfo.Mode() { - t.Fatalf("[%s] %s: File mode differed between on disk and compressed", name, + t.Fatalf("[%s] %s: File mode differed between on disk and compressed", formatName, expectedFileInfo.Mode().String()+" : "+actualFileInfo.Mode().String()) } if !bytes.Equal(expected, actual) { - t.Fatalf("[%s] %s: File contents differed between on disk and compressed", name, origPath) + t.Fatalf("[%s] %s: File contents differed between on disk and compressed", formatName, origPath) } return nil }) if got, want := actualFileCount, expectedFileCount; got != want { - t.Fatalf("[%s] Expected %d resulting files, got %d", name, want, got) + t.Fatalf("[%s] Expected %d resulting files, got %d", formatName, want, got) } } -func BenchmarkMake(b *testing.B) { - tmp, err := ioutil.TempDir("", "archiver") - if err != nil { - b.Fatal(err) - } - defer os.RemoveAll(tmp) - - for name, ar := range SupportedFormats { - name, ar := name, ar - b.Run(name, func(b *testing.B) { - // skip RAR for now - if _, ok := ar.(rarFormat); ok { - b.Skip("not supported") - } - outfile := filepath.Join(tmp, "benchMake-"+name) - for i := 0; i < b.N; i++ { - err = ar.Make(outfile, []string{"testdata"}) - if err != nil { - b.Fatalf("making archive: didn't expect an error, but got: %v", err) - } - } - }) - } +var archiveFormats = []interface{}{ + DefaultZip, + DefaultTar, + DefaultTarBz2, + DefaultTarGz, + DefaultTarLz4, + DefaultTarSz, + DefaultTarXz, } -func BenchmarkOpen(b *testing.B) { - tmp, err := ioutil.TempDir("", "archiver") - if err != nil { - b.Fatal(err) - } - defer os.RemoveAll(tmp) - - for name, ar := range SupportedFormats { - name, ar := name, ar - b.Run(name, func(b *testing.B) { - // skip RAR for now - if _, ok := ar.(rarFormat); ok { - b.Skip("not supported") - } - // prepare a archive - outfile := filepath.Join(tmp, "benchMake-"+name) - err = ar.Make(outfile, []string{"testdata"}) - if err != nil { - b.Fatalf("open archive: didn't expect an error, but got: %v", err) - } - // prepare extraction destination - dest := filepath.Join(tmp, "extraction_test") - os.Mkdir(dest, 0755) - - // let's go - b.ResetTimer() - for i := 0; i < b.N; i++ { - err = ar.Open(outfile, dest) - if err != nil { - b.Fatalf("open archive: didn't expect an error, but got: %v", err) - } - } - }) - } +type archiverUnarchiver interface { + Archiver + Unarchiver } diff --git a/archiver/bz2.go b/bz2.go similarity index 100% rename from archiver/bz2.go rename to bz2.go diff --git a/archiver/cmd/arc/main.go b/cmd/arc/main.go similarity index 100% rename from archiver/cmd/arc/main.go rename to cmd/arc/main.go diff --git a/cmd/archiver/main.go b/cmd/archiver/main.go deleted file mode 100644 index e0a73083..00000000 --- a/cmd/archiver/main.go +++ /dev/null @@ -1,94 +0,0 @@ -package main - -import ( - "fmt" - "os" - - "github.com/mholt/archiver" -) - -func main() { - if len(os.Args) == 2 && os.Args[1] == "-h" { - fmt.Println(usage) - os.Exit(0) - } - if len(os.Args) < 3 { - fatal(usage) - } - - cmd, filename := os.Args[1], os.Args[2] - - ff := archiver.MatchingFormat(filename) - if ff == nil { - fatalf("%s: Unsupported file extension", filename) - } - - var err error - switch cmd { - case "make": - if len(os.Args) < 4 { - fatal(usage) - } - err = ff.Make(filename, os.Args[3:]) - case "open": - dest, osErr := os.Getwd() - if osErr != nil { - fatal(err) - } - if len(os.Args) == 4 { - dest = os.Args[3] - } else if len(os.Args) > 4 { - fatal(usage) - } - err = ff.Open(filename, dest) - default: - fatal(usage) - } - if err != nil { - fatal(err) - } -} - -func fatal(v ...interface{}) { - fmt.Fprintln(os.Stderr, v...) - os.Exit(1) -} - -func fatalf(s string, v ...interface{}) { - fmt.Fprintf(os.Stderr, s+"\n", v...) - os.Exit(1) -} - -const usage = `Usage: archiver {make|open} [files...] - make - Create a new archive file. List the files/folders - to include in the archive; at least one required. - open - Extract an archive file. Give only the archive to - open and the destination folder to extract into. - - Specifying archive format: - The format of the archive is determined by its - file extension. Supported extensions: - .zip - .tar - .tar.gz - .tgz - .tar.bz2 - .tbz2 - .tar.xz - .txz - .tar.lz4 - .tlz4 - .tar.sz - .tsz - .rar (open only) - - Existing files: - When creating an archive file that already exists, - archiver will overwrite the existing file. When - extracting files, archiver will NOT overwrite files - that already exist in the destination path; this - is treated as an error and extraction will abort. - - Use "archiver -h" to display this help message` diff --git a/archiver/filecompressor.go b/filecompressor.go similarity index 100% rename from archiver/filecompressor.go rename to filecompressor.go diff --git a/archiver/gz.go b/gz.go similarity index 100% rename from archiver/gz.go rename to gz.go diff --git a/archiver/lz4.go b/lz4.go similarity index 100% rename from archiver/lz4.go rename to lz4.go diff --git a/rar.go b/rar.go index 3ff61da6..ba55fecf 100644 --- a/rar.go +++ b/rar.go @@ -4,117 +4,372 @@ import ( "bytes" "fmt" "io" + "log" "os" + "path" "path/filepath" - "strings" + "time" "github.com/nwaples/rardecode" ) -// Rar is for RAR archive format -var Rar rarFormat +// Rar provides facilities for reading RAR archives. +// See https://www.rarlab.com/technote.htm. +type Rar struct { + // Whether to overwrite existing files; if false, + // an error is returned if the file exists. + OverwriteExisting bool -func init() { - RegisterFormat("Rar", Rar) + // Whether to make all the directories necessary + // to create a rar archive in the desired path. + MkdirAll bool + + // A single top-level folder can be implicitly + // created by the Unarchive method if the files + // to be extracted from the archive do not all + // have a common root. This roughly mimics the + // behavior of archival tools integrated into OS + // file browsers which create a subfolder to + // avoid unexpectedly littering the destination + // folder with potentially many files, causing a + // problematic cleanup/organization situation. + // This feature is available for both creation + // and extraction of archives, but may be slightly + // inefficient with lots and lots of files, + // especially on extraction. + ImplicitTopLevelFolder bool + + // If true, errors encountered during reading + // or writing a single file will be logged and + // the operation will continue on remaining files. + ContinueOnError bool + + // The password to open archives (optional). + Password string + + rr *rardecode.Reader // underlying stream reader + rc *rardecode.ReadCloser // supports multi-volume archives (files only) } -type rarFormat struct{} +// Unarchive unpacks the .rar file at source to destination. +// Destination will be treated as a folder name. It supports +// multi-volume archives. +func (r *Rar) Unarchive(source, destination string) error { + if !fileExists(destination) && r.MkdirAll { + err := mkdir(destination) + if err != nil { + return fmt.Errorf("preparing destination: %v", err) + } + } + + // if the files in the archive do not all share a common + // root, then make sure we extract to a single subfolder + // rather than potentially littering the destination... + if r.ImplicitTopLevelFolder { + var err error + destination, err = r.addTopLevelFolder(source, destination) + if err != nil { + return fmt.Errorf("scanning source archive: %v", err) + } + } + + err := r.OpenFile(source) + if err != nil { + return fmt.Errorf("opening rar archive for reading: %v", err) + } + defer r.Close() -func (rarFormat) Match(filename string) bool { - return strings.HasSuffix(strings.ToLower(filename), ".rar") || isRar(filename) + for { + err := r.unrarNext(destination) + if err == io.EOF { + break + } + if err != nil { + if r.ContinueOnError { + log.Printf("[ERROR] Reading file in rar archive: %v", err) + continue + } + return fmt.Errorf("reading file in rar archive: %v", err) + } + } + + return nil } -// isRar checks the file has the RAR 1.5 or 5.0 format signature by reading its -// beginning bytes and matching it -func isRar(rarPath string) bool { - f, err := os.Open(rarPath) +// addTopLevelFolder scans the files contained inside +// the tarball named sourceArchive and returns a modified +// destination if all the files do not share the same +// top-level folder. +func (r *Rar) addTopLevelFolder(sourceArchive, destination string) (string, error) { + file, err := os.Open(sourceArchive) if err != nil { - return false + return "", fmt.Errorf("opening source archive: %v", err) } - defer f.Close() + defer file.Close() - buf := make([]byte, 8) - if n, err := f.Read(buf); err != nil || n < 8 { - return false + rc, err := rardecode.NewReader(file, r.Password) + if err != nil { + return "", fmt.Errorf("creating archive reader: %v", err) } - return bytes.Equal(buf[:7], []byte("Rar!\x1a\x07\x00")) || // ver 1.5 - bytes.Equal(buf, []byte("Rar!\x1a\x07\x01\x00")) // ver 5.0 + var files []string + for { + hdr, err := rc.Next() + if err == io.EOF { + break + } + if err != nil { + return "", fmt.Errorf("scanning tarball's file listing: %v", err) + } + files = append(files, hdr.Name) + } + + if multipleTopLevels(files) { + destination = filepath.Join(destination, folderNameFromFileName(sourceArchive)) + } + + return destination, nil } -// Write outputs a .rar archive, but this is not implemented because -// RAR is a proprietary format. It is here only for symmetry with -// the other archive formats in this package. -func (rarFormat) Write(output io.Writer, filePaths []string) error { - return fmt.Errorf("write: RAR not implemented (proprietary format)") +func (r *Rar) unrarNext(to string) error { + f, err := r.Read() + if err != nil { + return err // don't wrap error; calling loop must break on io.EOF + } + header, ok := f.Header.(*rardecode.FileHeader) + if !ok { + return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) + } + return r.unrarFile(f, filepath.Join(to, header.Name)) } -// Make makes a .rar archive, but this is not implemented because -// RAR is a proprietary format. It is here only for symmetry with -// the other archive formats in this package. -func (rarFormat) Make(rarPath string, filePaths []string) error { - return fmt.Errorf("make %s: RAR not implemented (proprietary format)", rarPath) +func (r *Rar) unrarFile(f File, to string) error { + // do not overwrite existing files, if configured + if !f.IsDir() && !r.OverwriteExisting && fileExists(to) { + return fmt.Errorf("file already exists: %s", to) + } + + hdr, ok := f.Header.(*rardecode.FileHeader) + if !ok { + return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) + } + + // if files come before their containing folders, then we must + // create their folders before writing the file + err := mkdir(filepath.Dir(to)) + if err != nil { + return fmt.Errorf("making parent directories: %v", err) + } + + return writeNewFile(to, r.rr, hdr.Mode()) } -// Read extracts the RAR file read from input and puts the contents -// into destination. -func (rarFormat) Read(input io.Reader, destination string) error { - rr, err := rardecode.NewReader(input, "") +// OpenFile opens filename for reading. This method supports +// multi-volume archives, whereas Open does not (but Open +// supports any stream, not just files). +func (r *Rar) OpenFile(filename string) error { + if r.rr != nil { + return fmt.Errorf("rar archive is already open for reading") + } + var err error + r.rc, err = rardecode.OpenReader(filename, r.Password) if err != nil { - return fmt.Errorf("read: failed to create reader: %v", err) + return err } + r.rr = &r.rc.Reader + return nil +} - return extract(rr, destination) +// Open opens t for reading an archive from +// in. The size parameter is not used. +func (r *Rar) Open(in io.Reader, size int64) error { + if r.rr != nil { + return fmt.Errorf("rar archive is already open for reading") + } + var err error + r.rr, err = rardecode.NewReader(in, r.Password) + return err } -// Open extracts the RAR file at source and puts the contents -// into destination. -func (rarFormat) Open(source, destination string) error { - rf, err := rardecode.OpenReader(source, "") +// Read reads the next file from t, which must have +// already been opened for reading. If there are no +// more files, the error is io.EOF. The File must +// be closed when finished reading from it. +func (r *Rar) Read() (File, error) { + if r.rr == nil { + return File{}, fmt.Errorf("rar archive is not open") + } + + hdr, err := r.rr.Next() if err != nil { - return fmt.Errorf("%s: failed to open file: %v", source, err) + return File{}, err // don't wrap error; preserve io.EOF } - defer rf.Close() - return extract(&rf.Reader, destination) + file := File{ + FileInfo: rarFileInfo{hdr}, + Header: hdr, + ReadCloser: ReadFakeCloser{r.rr}, + } + + return file, nil } -func extract(rr *rardecode.Reader, destination string) error { +// Close closes the rar archive(s) opened by Create and Open. +func (r *Rar) Close() error { + var err error + if r.rc != nil { + rc := r.rc + r.rc = nil + err = rc.Close() + } + if r.rr != nil { + r.rr = nil + } + return err +} + +// Walk calls walkFn for each visited item in archive. +func (r *Rar) Walk(archive string, walkFn WalkFunc) error { + file, err := os.Open(archive) + if err != nil { + return fmt.Errorf("opening archive file: %v", err) + } + defer file.Close() + + err = r.Open(file, 0) + if err != nil { + return fmt.Errorf("opening archive: %v", err) + } + defer r.Close() + for { - header, err := rr.Next() + f, err := r.Read() if err == io.EOF { break - } else if err != nil { - return err } - - err = sanitizeExtractPath(header.Name, destination) if err != nil { - return err + if r.ContinueOnError { + log.Printf("[ERROR] Opening next file: %v", err) + continue + } + return fmt.Errorf("opening next file: %v", err) } + err = walkFn(f) + if err != nil { + if err == ErrStopWalk { + break + } + if r.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", f.Name(), err) + continue + } + return fmt.Errorf("walking %s: %v", f.Name(), err) + } + } - destpath := filepath.Join(destination, header.Name) + return nil +} - if header.IsDir { - err = mkdir(destpath) - if err != nil { - return err - } - continue +// Extract extracts a single file from the rar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (r *Rar) Extract(source, target, destination string) error { + // target refers to a path inside the archive, which should be clean also + target = path.Clean(target) + + // if the target ends up being a directory, then + // we will continue walking and extracting files + // until we are no longer within that directory + var targetDirPath string + + return r.Walk(source, func(f File) error { + th, ok := f.Header.(*rardecode.FileHeader) + if !ok { + return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) } - // if files come before their containing folders, then we must - // create their folders before writing the file - err = mkdir(filepath.Dir(destpath)) - if err != nil { - return err + // importantly, cleaning the path strips tailing slash, + // which must be appended to folders within the archive + name := path.Clean(th.Name) + if f.IsDir() && target == name { + targetDirPath = path.Dir(name) } - err = writeNewFile(destpath, rr, header.Mode()) - if err != nil { - return err + if within(target, th.Name) { + // either this is the exact file we want, or is + // in the directory we want to extract + + // build the filename we will extract to + end, err := filepath.Rel(targetDirPath, th.Name) + if err != nil { + return fmt.Errorf("relativizing paths: %v", err) + } + joined := filepath.Join(destination, end) + + err = r.unrarFile(f, joined) + if err != nil { + return fmt.Errorf("extracting file %s: %v", th.Name, err) + } + + // if our target was not a directory, stop walk + if targetDirPath == "" { + return ErrStopWalk + } + } else if targetDirPath != "" { + // finished walking the entire directory + return ErrStopWalk } + + return nil + }) +} + +// Match returns true if the format of file matches this +// type's format. It should not affect reader position. +func (*Rar) Match(file *os.File) (bool, error) { + currentPos, err := file.Seek(0, io.SeekCurrent) + if err != nil { + return false, err + } + _, err = file.Seek(0, 0) + if err != nil { + return false, err } + defer file.Seek(currentPos, io.SeekStart) - return nil + buf := make([]byte, 8) + if n, err := file.Read(buf); err != nil || n < 8 { + return false, nil + } + hasTarHeader := bytes.Equal(buf[:7], []byte("Rar!\x1a\x07\x00")) || // ver 1.5 + bytes.Equal(buf, []byte("Rar!\x1a\x07\x01\x00")) // ver 5.0 + return hasTarHeader, nil +} + +func (r *Rar) String() string { return "rar" } + +type rarFileInfo struct { + fh *rardecode.FileHeader +} + +func (rfi rarFileInfo) Name() string { return rfi.fh.Name } +func (rfi rarFileInfo) Size() int64 { return rfi.fh.UnPackedSize } +func (rfi rarFileInfo) Mode() os.FileMode { return rfi.fh.Mode() } +func (rfi rarFileInfo) ModTime() time.Time { return rfi.fh.ModificationTime } +func (rfi rarFileInfo) IsDir() bool { return rfi.fh.IsDir } +func (rfi rarFileInfo) Sys() interface{} { return nil } + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(Rar)) + _ = Unarchiver(new(Rar)) + _ = Walker(new(Rar)) + _ = Extractor(new(Rar)) + _ = Matcher(new(Rar)) + _ = os.FileInfo(rarFileInfo{}) +) + +// DefaultRar is a convenient archiver ready to use. +var DefaultRar = &Rar{ + MkdirAll: true, } diff --git a/archiver/sz.go b/sz.go similarity index 100% rename from archiver/sz.go rename to sz.go diff --git a/tar.go b/tar.go index ee0c4436..dd9cf0d2 100644 --- a/tar.go +++ b/tar.go @@ -5,240 +5,599 @@ import ( "bytes" "fmt" "io" + "log" "os" + "path" "path/filepath" "strconv" "strings" ) -// Tar is for Tar format -var Tar tarFormat - -func init() { - RegisterFormat("Tar", Tar) +// Tar provides facilities for operating TAR archives. +// See http://www.gnu.org/software/tar/manual/html_node/Standard.html. +type Tar struct { + // Whether to overwrite existing files; if false, + // an error is returned if the file exists. + OverwriteExisting bool + + // Whether to make all the directories necessary + // to create a tar archive in the desired path. + MkdirAll bool + + // A single top-level folder can be implicitly + // created by the Archive or Unarchive methods + // if the files to be added to the archive + // or the files to be extracted from the archive + // do not all have a common root. This roughly + // mimics the behavior of archival tools integrated + // into OS file browsers which create a subfolder + // to avoid unexpectedly littering the destination + // folder with potentially many files, causing a + // problematic cleanup/organization situation. + // This feature is available for both creation + // and extraction of archives, but may be slightly + // inefficient with lots and lots of files, + // especially on extraction. + ImplicitTopLevelFolder bool + + // If true, errors encountered during reading + // or writing a single file will be logged and + // the operation will continue on remaining files. + ContinueOnError bool + + tw *tar.Writer + tr *tar.Reader + + readerWrapFn func(io.Reader) (io.Reader, error) + writerWrapFn func(io.Writer) (io.Writer, error) + cleanupWrapFn func() } -type tarFormat struct{} +// Archive creates a tarball file at destination containing +// the files listed in sources. The destination must end with +// ".tar". File paths can be those of regular files or +// directories; directories will be recursively added. +func (t *Tar) Archive(sources []string, destination string) error { + if t.writerWrapFn == nil && !strings.HasSuffix(destination, ".tar") { + return fmt.Errorf("output filename must have .tar extension") + } + if !t.OverwriteExisting && fileExists(destination) { + return fmt.Errorf("file already exists: %s", destination) + } -func (tarFormat) Match(filename string) bool { - return strings.HasSuffix(strings.ToLower(filename), ".tar") || isTar(filename) -} + // make the folder to contain the resulting archive + // if it does not already exist + destDir := filepath.Dir(destination) + if t.MkdirAll && !fileExists(destDir) { + err := mkdir(destDir) + if err != nil { + return fmt.Errorf("making folder for destination: %v", err) + } + } -const tarBlockSize int = 512 + out, err := os.Create(destination) + if err != nil { + return fmt.Errorf("creating %s: %v", destination, err) + } + defer out.Close() -// isTar checks the file has the Tar format header by reading its beginning -// block. -func isTar(tarPath string) bool { - f, err := os.Open(tarPath) + err = t.Create(out) if err != nil { - return false + return fmt.Errorf("creating tar: %v", err) } - defer f.Close() + defer t.Close() - buf := make([]byte, tarBlockSize) - if _, err = io.ReadFull(f, buf); err != nil { - return false + var topLevelFolder string + if t.ImplicitTopLevelFolder && multipleTopLevels(sources) { + topLevelFolder = folderNameFromFileName(destination) + } + + for _, source := range sources { + err := t.writeWalk(source, topLevelFolder, destination) + if err != nil { + return fmt.Errorf("walking %s: %v", source, err) + } } - return hasTarHeader(buf) + return nil } -// hasTarHeader checks passed bytes has a valid tar header or not. buf must -// contain at least 512 bytes and if not, it always returns false. -func hasTarHeader(buf []byte) bool { - if len(buf) < tarBlockSize { - return false +// Unarchive unpacks the .tar file at source to destination. +// Destination will be treated as a folder name. +func (t *Tar) Unarchive(source, destination string) error { + if !fileExists(destination) && t.MkdirAll { + err := mkdir(destination) + if err != nil { + return fmt.Errorf("preparing destination: %v", err) + } } - b := buf[148:156] - b = bytes.Trim(b, " \x00") // clean up all spaces and null bytes - if len(b) == 0 { - return false // unknown format + // if the files in the archive do not all share a common + // root, then make sure we extract to a single subfolder + // rather than potentially littering the destination... + if t.ImplicitTopLevelFolder { + var err error + destination, err = t.addTopLevelFolder(source, destination) + if err != nil { + return fmt.Errorf("scanning source archive: %v", err) + } } - hdrSum, err := strconv.ParseUint(string(b), 8, 64) + + file, err := os.Open(source) if err != nil { - return false + return fmt.Errorf("opening source archive: %v", err) } + defer file.Close() - // According to the go official archive/tar, Sun tar uses signed byte - // values so this calcs both signed and unsigned - var usum uint64 - var sum int64 - for i, c := range buf { - if 148 <= i && i < 156 { - c = ' ' // checksum field itself is counted as branks - } - usum += uint64(uint8(c)) - sum += int64(int8(c)) + err = t.Open(file, 0) + if err != nil { + return fmt.Errorf("opening tar archive for reading: %v", err) } + defer t.Close() - if hdrSum != usum && int64(hdrSum) != sum { - return false // invalid checksum + for { + err := t.untarNext(destination) + if err == io.EOF { + break + } + if err != nil { + if t.ContinueOnError { + log.Printf("[ERROR] Reading file in tar archive: %v", err) + continue + } + return fmt.Errorf("reading file in tar archive: %v", err) + } } - return true + return nil } -// Write outputs a .tar file to a Writer containing the -// contents of files listed in filePaths. File paths can -// be those of regular files or directories. Regular -// files are stored at the 'root' of the archive, and -// directories are recursively added. -func (tarFormat) Write(output io.Writer, filePaths []string) error { - return writeTar(filePaths, output, "") +// addTopLevelFolder scans the files contained inside +// the tarball named sourceArchive and returns a modified +// destination if all the files do not share the same +// top-level folder. +func (t *Tar) addTopLevelFolder(sourceArchive, destination string) (string, error) { + file, err := os.Open(sourceArchive) + if err != nil { + return "", fmt.Errorf("opening source archive: %v", err) + } + defer file.Close() + + // if the reader is to be wrapped, ensure we do that now + // or we will not be able to read the archive successfully + reader := io.Reader(file) + if t.readerWrapFn != nil { + reader, err = t.readerWrapFn(reader) + if err != nil { + return "", fmt.Errorf("wrapping reader: %v", err) + } + } + if t.cleanupWrapFn != nil { + defer t.cleanupWrapFn() + } + + tr := tar.NewReader(reader) + + var files []string + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return "", fmt.Errorf("scanning tarball's file listing: %v", err) + } + files = append(files, hdr.Name) + } + + if multipleTopLevels(files) { + destination = filepath.Join(destination, folderNameFromFileName(sourceArchive)) + } + + return destination, nil } -// Make creates a .tar file at tarPath containing the -// contents of files listed in filePaths. File paths can -// be those of regular files or directories. Regular -// files are stored at the 'root' of the archive, and -// directories are recursively added. -func (tarFormat) Make(tarPath string, filePaths []string) error { - out, err := os.Create(tarPath) +func (t *Tar) untarNext(to string) error { + f, err := t.Read() if err != nil { - return fmt.Errorf("error creating %s: %v", tarPath, err) + return err // don't wrap error; calling loop must break on io.EOF } - defer out.Close() - - return writeTar(filePaths, out, tarPath) + header, ok := f.Header.(*tar.Header) + if !ok { + return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header) + } + return t.untarFile(f, filepath.Join(to, header.Name)) } -func writeTar(filePaths []string, output io.Writer, dest string) error { - tarWriter := tar.NewWriter(output) - defer tarWriter.Close() +func (t *Tar) untarFile(f File, to string) error { + // do not overwrite existing files, if configured + if !f.IsDir() && !t.OverwriteExisting && fileExists(to) { + return fmt.Errorf("file already exists: %s", to) + } - return tarball(filePaths, tarWriter, dest) -} + hdr, ok := f.Header.(*tar.Header) + if !ok { + return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header) + } -// tarball writes all files listed in filePaths into tarWriter, which is -// writing into a file located at dest. -func tarball(filePaths []string, tarWriter *tar.Writer, dest string) error { - for _, fpath := range filePaths { - err := tarFile(tarWriter, fpath, dest) - if err != nil { - return err - } + switch hdr.Typeflag { + case tar.TypeDir: + return mkdir(to) + case tar.TypeReg, tar.TypeRegA, tar.TypeChar, tar.TypeBlock, tar.TypeFifo: + return writeNewFile(to, f, f.Mode()) + case tar.TypeSymlink: + return writeNewSymbolicLink(to, hdr.Linkname) + case tar.TypeLink: + return writeNewHardLink(to, filepath.Join(to, hdr.Linkname)) + case tar.TypeXGlobalHeader: + return nil // ignore the pax global header from git-generated tarballs + default: + return fmt.Errorf("%s: unknown type flag: %c", hdr.Name, hdr.Typeflag) } - return nil } -// tarFile writes the file at source into tarWriter. It does so -// recursively for directories. -func tarFile(tarWriter *tar.Writer, source, dest string) error { - sourceInfo, err := os.Stat(source) +func (t *Tar) writeWalk(source, topLevelFolder, destination string) error { + sourceAbs, err := filepath.Abs(source) + if err != nil { + return fmt.Errorf("getting absolute path: %v", err) + } + sourceInfo, err := os.Stat(sourceAbs) if err != nil { return fmt.Errorf("%s: stat: %v", source, err) } + destAbs, err := filepath.Abs(destination) + if err != nil { + return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err) + } var baseDir string + if topLevelFolder != "" { + baseDir = topLevelFolder + } if sourceInfo.IsDir() { - baseDir = filepath.Base(source) + baseDir = path.Join(baseDir, sourceInfo.Name()) } - return filepath.Walk(source, func(path string, info os.FileInfo, err error) error { - if err != nil { - return fmt.Errorf("error walking to %s: %v", path, err) + return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error { + handleErr := func(err error) error { + if t.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", fpath, err) + return nil + } + return err } - - header, err := tar.FileInfoHeader(info, path) if err != nil { - return fmt.Errorf("%s: making header: %v", path, err) + return handleErr(fmt.Errorf("traversing %s: %v", fpath, err)) } - - if baseDir != "" { - header.Name = filepath.ToSlash(filepath.Join(baseDir, strings.TrimPrefix(path, source))) + if info == nil { + return handleErr(fmt.Errorf("no file info")) } - if header.Name == dest { - // our new tar file is inside the directory being archived; skip it + // make sure we do not copy our output file into itself + fpathAbs, err := filepath.Abs(fpath) + if err != nil { + return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err)) + } + if within(fpathAbs, destAbs) { return nil } - if info.IsDir() { - header.Name += "/" + // build the name to be used in the archive + name, err := filepath.Rel(source, fpath) + if err != nil { + return handleErr(err) } + nameInArchive := path.Join(baseDir, filepath.ToSlash(name)) - err = tarWriter.WriteHeader(header) + file, err := os.Open(fpath) if err != nil { - return fmt.Errorf("%s: writing header: %v", path, err) + return handleErr(fmt.Errorf("%s: opening: %v", fpath, err)) } - - if info.IsDir() { - return nil + defer file.Close() + + err = t.Write(File{ + FileInfo: FileInfo{ + FileInfo: info, + CustomName: nameInArchive, + }, + ReadCloser: file, + }) + if err != nil { + return handleErr(fmt.Errorf("%s: writing: %s", fpath, err)) } - if header.Typeflag == tar.TypeReg { - file, err := os.Open(path) - if err != nil { - return fmt.Errorf("%s: open: %v", path, err) - } - defer file.Close() + return nil + }) +} - _, err = io.CopyN(tarWriter, file, info.Size()) - if err != nil && err != io.EOF { - return fmt.Errorf("%s: copying contents: %v", path, err) - } +// Create opens t for writing a tar archive to out. +func (t *Tar) Create(out io.Writer) error { + if t.tw != nil { + return fmt.Errorf("tar archive is already created for writing") + } + + // wrapping writers allows us to output + // compressed tarballs, for example + if t.writerWrapFn != nil { + var err error + out, err = t.writerWrapFn(out) + if err != nil { + return fmt.Errorf("wrapping writer: %v", err) } + } + + t.tw = tar.NewWriter(out) + return nil +} + +// Write writes f to t, which must have been opened for writing first. +func (t *Tar) Write(f File) error { + if t.tw == nil { + return fmt.Errorf("tar archive was not created for writing first") + } + if f.FileInfo == nil { + return fmt.Errorf("no file info") + } + if f.FileInfo.Name() == "" { + return fmt.Errorf("missing file name") + } + if f.ReadCloser == nil { + return fmt.Errorf("%s: no way to read file contents", f.Name()) + } + + hdr, err := tar.FileInfoHeader(f, f.Name()) + if err != nil { + return fmt.Errorf("%s: making header: %v", f.Name(), err) + } + + err = t.tw.WriteHeader(hdr) + if err != nil { + return fmt.Errorf("%s: writing header: %v", hdr.Name, err) + } + + if f.IsDir() { return nil - }) + } + + if hdr.Typeflag == tar.TypeReg { + _, err := io.Copy(t.tw, f) + if err != nil { + return fmt.Errorf("%s: copying contents: %v", f.Name(), err) + } + } + + return nil } -// Read untars a .tar file read from a Reader and puts -// the contents into destination. -func (tarFormat) Read(input io.Reader, destination string) error { - return untar(tar.NewReader(input), destination) +// Open opens t for reading an archive from +// in. The size parameter is not used. +func (t *Tar) Open(in io.Reader, size int64) error { + if t.tr != nil { + return fmt.Errorf("tar archive is already open for reading") + } + // wrapping readers allows us to open compressed tarballs + if t.readerWrapFn != nil { + var err error + in, err = t.readerWrapFn(in) + if err != nil { + return fmt.Errorf("wrapping file reader: %v", err) + } + } + t.tr = tar.NewReader(in) + return nil } -// Open untars source and puts the contents into destination. -func (tarFormat) Open(source, destination string) error { - f, err := os.Open(source) +// Read reads the next file from t, which must have +// already been opened for reading. If there are no +// more files, the error is io.EOF. The File must +// be closed when finished reading from it. +func (t *Tar) Read() (File, error) { + if t.tr == nil { + return File{}, fmt.Errorf("tar archive is not open") + } + + hdr, err := t.tr.Next() if err != nil { - return fmt.Errorf("%s: failed to open archive: %v", source, err) + return File{}, err // don't wrap error; preserve io.EOF } - defer f.Close() - return Tar.Read(f, destination) + file := File{ + FileInfo: hdr.FileInfo(), + Header: hdr, + ReadCloser: ReadFakeCloser{t.tr}, + } + + return file, nil +} + +// Close closes the tar archive(s) opened by Create and Open. +func (t *Tar) Close() error { + var err error + if t.tr != nil { + t.tr = nil + } + if t.tw != nil { + tw := t.tw + t.tw = nil + err = tw.Close() + } + // make sure cleanup of "Reader/Writer wrapper" + // (say that ten times fast) happens AFTER the + // underlying stream is closed + if t.cleanupWrapFn != nil { + t.cleanupWrapFn() + } + return err } -// untar un-tarballs the contents of tr into destination. -func untar(tr *tar.Reader, destination string) error { +// Walk calls walkFn for each visited item in archive. +func (t *Tar) Walk(archive string, walkFn WalkFunc) error { + file, err := os.Open(archive) + if err != nil { + return fmt.Errorf("opening archive file: %v", err) + } + defer file.Close() + + err = t.Open(file, 0) + if err != nil { + return fmt.Errorf("opening archive: %v", err) + } + defer t.Close() + for { - header, err := tr.Next() + f, err := t.Read() if err == io.EOF { break - } else if err != nil { - return err } - - if err := untarFile(tr, header, destination); err != nil { - return err + if err != nil { + if t.ContinueOnError { + log.Printf("[ERROR] Opening next file: %v", err) + continue + } + return fmt.Errorf("opening next file: %v", err) + } + err = walkFn(f) + if err != nil { + if err == ErrStopWalk { + break + } + if t.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", f.Name(), err) + continue + } + return fmt.Errorf("walking %s: %v", f.Name(), err) } } + return nil } -// untarFile untars a single file from tr with header header into destination. -func untarFile(tr *tar.Reader, header *tar.Header, destination string) error { - err := sanitizeExtractPath(header.Name, destination) +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (t *Tar) Extract(source, target, destination string) error { + // target refers to a path inside the archive, which should be clean also + target = path.Clean(target) + + // if the target ends up being a directory, then + // we will continue walking and extracting files + // until we are no longer within that directory + var targetDirPath string + + return t.Walk(source, func(f File) error { + th, ok := f.Header.(*tar.Header) + if !ok { + return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header) + } + + // importantly, cleaning the path strips tailing slash, + // which must be appended to folders within the archive + name := path.Clean(th.Name) + if f.IsDir() && target == name { + targetDirPath = path.Dir(name) + } + + if within(target, th.Name) { + // either this is the exact file we want, or is + // in the directory we want to extract + + // build the filename we will extract to + end, err := filepath.Rel(targetDirPath, th.Name) + if err != nil { + return fmt.Errorf("relativizing paths: %v", err) + } + joined := filepath.Join(destination, end) + + err = t.untarFile(f, joined) + if err != nil { + return fmt.Errorf("extracting file %s: %v", th.Name, err) + } + + // if our target was not a directory, stop walk + if targetDirPath == "" { + return ErrStopWalk + } + } else if targetDirPath != "" { + // finished walking the entire directory + return ErrStopWalk + } + + return nil + }) +} + +// Match returns true if the format of file matches this +// type's format. It should not affect reader position. +func (*Tar) Match(file *os.File) (bool, error) { + currentPos, err := file.Seek(0, io.SeekCurrent) if err != nil { - return err + return false, err } + _, err = file.Seek(0, 0) + if err != nil { + return false, err + } + defer file.Seek(currentPos, io.SeekStart) - destpath := filepath.Join(destination, header.Name) + buf := make([]byte, tarBlockSize) + if _, err = io.ReadFull(file, buf); err != nil { + return false, nil + } + return hasTarHeader(buf), nil +} - switch header.Typeflag { - case tar.TypeDir: - return mkdir(destpath) - case tar.TypeReg, tar.TypeRegA, tar.TypeChar, tar.TypeBlock, tar.TypeFifo: - return writeNewFile(destpath, tr, header.FileInfo().Mode()) - case tar.TypeSymlink: - return writeNewSymbolicLink(destpath, header.Linkname) - case tar.TypeLink: - return writeNewHardLink(destpath, filepath.Join(destination, header.Linkname)) - case tar.TypeXGlobalHeader: - // ignore the pax global header from git generated tarballs - return nil - default: - return fmt.Errorf("%s: unknown type flag: %c", header.Name, header.Typeflag) +// hasTarHeader checks passed bytes has a valid tar header or not. buf must +// contain at least 512 bytes and if not, it always returns false. +func hasTarHeader(buf []byte) bool { + if len(buf) < tarBlockSize { + return false } + + b := buf[148:156] + b = bytes.Trim(b, " \x00") // clean up all spaces and null bytes + if len(b) == 0 { + return false // unknown format + } + hdrSum, err := strconv.ParseUint(string(b), 8, 64) + if err != nil { + return false + } + + // According to the go official archive/tar, Sun tar uses signed byte + // values so this calcs both signed and unsigned + var usum uint64 + var sum int64 + for i, c := range buf { + if 148 <= i && i < 156 { + c = ' ' // checksum field itself is counted as branks + } + usum += uint64(uint8(c)) + sum += int64(int8(c)) + } + + if hdrSum != usum && int64(hdrSum) != sum { + return false // invalid checksum + } + + return true +} + +func (t *Tar) String() string { return "tar" } + +const tarBlockSize = 512 + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(Tar)) + _ = Writer(new(Tar)) + _ = Archiver(new(Tar)) + _ = Unarchiver(new(Tar)) + _ = Walker(new(Tar)) + _ = Extractor(new(Tar)) + _ = Matcher(new(Tar)) +) + +// DefaultTar is a convenient archiver ready to use. +var DefaultTar = &Tar{ + MkdirAll: true, } diff --git a/tarbz2.go b/tarbz2.go index e0051d3c..2b44bf4b 100644 --- a/tarbz2.go +++ b/tarbz2.go @@ -3,104 +3,110 @@ package archiver import ( "fmt" "io" - "os" "strings" "github.com/dsnet/compress/bzip2" ) -// TarBz2 is for TarBz2 format -var TarBz2 tarBz2Format +// TarBz2 facilitates bzip2 compression +// (https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf) +// of tarball archives. +type TarBz2 struct { + *Tar -func init() { - RegisterFormat("TarBz2", TarBz2) + CompressionLevel int } -type tarBz2Format struct{} - -func (tarBz2Format) Match(filename string) bool { - return strings.HasSuffix(strings.ToLower(filename), ".tar.bz2") || - strings.HasSuffix(strings.ToLower(filename), ".tbz2") || - isTarBz2(filename) -} - -// isTarBz2 checks the file has the bzip2 compressed Tar format header by -// reading its beginning block. -func isTarBz2(tarbz2Path string) bool { - f, err := os.Open(tarbz2Path) - if err != nil { - return false - } - defer f.Close() - - bz2r, err := bzip2.NewReader(f, nil) - if err != nil { - return false +// Archive creates a compressed tar file at destination +// containing the files listed in sources. The destination +// must end with ".tar.bz2" or ".tbz2". File paths can be +// those of regular files or directories; directories will +// be recursively added. +func (tbz2 *TarBz2) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".tar.bz2") && + !strings.HasSuffix(destination, ".tbz2") { + return fmt.Errorf("output filename must have .tar.bz2 or .tbz2 extension") } - defer bz2r.Close() + tbz2.wrapWriter() + return tbz2.Tar.Archive(sources, destination) +} - buf := make([]byte, tarBlockSize) - n, err := bz2r.Read(buf) - if err != nil || n < tarBlockSize { - return false - } +// Unarchive unpacks the compressed tarball at +// source to destination. Destination will be +// treated as a folder name. +func (tbz2 *TarBz2) Unarchive(source, destination string) error { + tbz2.wrapReader() + return tbz2.Tar.Unarchive(source, destination) +} - return hasTarHeader(buf) +// Walk calls walkFn for each visited item in archive. +func (tbz2 *TarBz2) Walk(archive string, walkFn WalkFunc) error { + tbz2.wrapReader() + return tbz2.Tar.Walk(archive, walkFn) } -// Write outputs a .tar.bz2 file to a Writer containing -// the contents of files listed in filePaths. File paths -// can be those of regular files or directories. Regular -// files are stored at the 'root' of the archive, and -// directories are recursively added. -func (tarBz2Format) Write(output io.Writer, filePaths []string) error { - return writeTarBz2(filePaths, output, "") +// Create opens tbz2 for writing a compressed +// tar archive to out. +func (tbz2 *TarBz2) Create(out io.Writer) error { + tbz2.wrapWriter() + return tbz2.Create(out) } -// Make creates a .tar.bz2 file at tarbz2Path containing -// the contents of files listed in filePaths. File paths -// can be those of regular files or directories. Regular -// files are stored at the 'root' of the archive, and -// directories are recursively added. -func (tarBz2Format) Make(tarbz2Path string, filePaths []string) error { - out, err := os.Create(tarbz2Path) - if err != nil { - return fmt.Errorf("error creating %s: %v", tarbz2Path, err) - } - defer out.Close() +// Open opens t for reading a compressed archive from +// in. The size parameter is not used. +func (tbz2 *TarBz2) Open(in io.Reader, size int64) error { + tbz2.wrapReader() + return tbz2.Tar.Open(in, size) +} - return writeTarBz2(filePaths, out, tarbz2Path) +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (tbz2 *TarBz2) Extract(source, target, destination string) error { + tbz2.wrapReader() + return tbz2.Tar.Extract(source, target, destination) } -func writeTarBz2(filePaths []string, output io.Writer, dest string) error { - bz2w, err := bzip2.NewWriter(output, nil) - if err != nil { - return fmt.Errorf("error compressing bzip2: %v", err) +func (tbz2 *TarBz2) wrapWriter() { + var bz2w *bzip2.Writer + tbz2.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { + var err error + bz2w, err = bzip2.NewWriter(w, &bzip2.WriterConfig{ + Level: tbz2.CompressionLevel, + }) + return bz2w, err + } + tbz2.Tar.cleanupWrapFn = func() { + bz2w.Close() } - defer bz2w.Close() - - return writeTar(filePaths, bz2w, dest) } -// Read untars a .tar.bz2 file read from a Reader and decompresses -// the contents into destination. -func (tarBz2Format) Read(input io.Reader, destination string) error { - bz2r, err := bzip2.NewReader(input, nil) - if err != nil { - return fmt.Errorf("error decompressing bzip2: %v", err) +func (tbz2 *TarBz2) wrapReader() { + var bz2r *bzip2.Reader + tbz2.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { + var err error + bz2r, err = bzip2.NewReader(r, nil) + return bz2r, err + } + tbz2.Tar.cleanupWrapFn = func() { + bz2r.Close() } - defer bz2r.Close() - - return Tar.Read(bz2r, destination) } -// Open untars source and decompresses the contents into destination. -func (tarBz2Format) Open(source, destination string) error { - f, err := os.Open(source) - if err != nil { - return fmt.Errorf("%s: failed to open archive: %v", source, err) - } - defer f.Close() +func (tbz2 *TarBz2) String() string { return "tar.bz2" } + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(TarBz2)) + _ = Writer(new(TarBz2)) + _ = Archiver(new(TarBz2)) + _ = Unarchiver(new(TarBz2)) + _ = Walker(new(TarBz2)) + _ = Extractor(new(TarBz2)) +) - return TarBz2.Read(f, destination) +// DefaultTarBz2 is a convenient archiver ready to use. +var DefaultTarBz2 = &TarBz2{ + CompressionLevel: bzip2.DefaultCompression, + Tar: DefaultTar, } diff --git a/targz.go b/targz.go index 6751d49d..513e71ed 100644 --- a/targz.go +++ b/targz.go @@ -4,95 +4,107 @@ import ( "compress/gzip" "fmt" "io" - "os" "strings" ) -// TarGz is for TarGz format -var TarGz tarGzFormat +// TarGz facilitates gzip compression +// (RFC 1952) of tarball archives. +type TarGz struct { + *Tar -func init() { - RegisterFormat("TarGz", TarGz) + // The compression level to use, as described + // in the compress/gzip package. + CompressionLevel int } -type tarGzFormat struct{} - -func (tarGzFormat) Match(filename string) bool { - return strings.HasSuffix(strings.ToLower(filename), ".tar.gz") || - strings.HasSuffix(strings.ToLower(filename), ".tgz") || - isTarGz(filename) -} - -// isTarGz checks the file has the gzip compressed Tar format header by reading -// its beginning block. -func isTarGz(targzPath string) bool { - f, err := os.Open(targzPath) - if err != nil { - return false - } - defer f.Close() - - gzr, err := gzip.NewReader(f) - if err != nil { - return false +// Archive creates a compressed tar file at destination +// containing the files listed in sources. The destination +// must end with ".tar.gz" or ".tgz". File paths can be +// those of regular files or directories; directories will +// be recursively added. +func (tgz *TarGz) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".tar.gz") && + !strings.HasSuffix(destination, ".tgz") { + return fmt.Errorf("output filename must have .tar.gz or .tgz extension") } - defer gzr.Close() - - buf := make([]byte, tarBlockSize) - n, err := gzr.Read(buf) - if err != nil || n < tarBlockSize { - return false - } - - return hasTarHeader(buf) + tgz.wrapWriter() + return tgz.Tar.Archive(sources, destination) } -// Write outputs a .tar.gz file to a Writer containing -// the contents of files listed in filePaths. It works -// the same way Tar does, but with gzip compression. -func (tarGzFormat) Write(output io.Writer, filePaths []string) error { - return writeTarGz(filePaths, output, "") +// Unarchive unpacks the compressed tarball at +// source to destination. Destination will be +// treated as a folder name. +func (tgz *TarGz) Unarchive(source, destination string) error { + tgz.wrapReader() + return tgz.Tar.Unarchive(source, destination) } -// Make creates a .tar.gz file at targzPath containing -// the contents of files listed in filePaths. It works -// the same way Tar does, but with gzip compression. -func (tarGzFormat) Make(targzPath string, filePaths []string) error { - out, err := os.Create(targzPath) - if err != nil { - return fmt.Errorf("error creating %s: %v", targzPath, err) - } - defer out.Close() +// Walk calls walkFn for each visited item in archive. +func (tgz *TarGz) Walk(archive string, walkFn WalkFunc) error { + tgz.wrapReader() + return tgz.Tar.Walk(archive, walkFn) +} - return writeTarGz(filePaths, out, targzPath) +// Create opens txz for writing a compressed +// tar archive to out. +func (tgz *TarGz) Create(out io.Writer) error { + tgz.wrapWriter() + return tgz.Create(out) } -func writeTarGz(filePaths []string, output io.Writer, dest string) error { - gzw := gzip.NewWriter(output) - defer gzw.Close() +// Open opens t for reading a compressed archive from +// in. The size parameter is not used. +func (tgz *TarGz) Open(in io.Reader, size int64) error { + tgz.wrapReader() + return tgz.Tar.Open(in, size) +} - return writeTar(filePaths, gzw, dest) +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (tgz *TarGz) Extract(source, target, destination string) error { + tgz.wrapReader() + return tgz.Tar.Extract(source, target, destination) } -// Read untars a .tar.gz file read from a Reader and decompresses -// the contents into destination. -func (tarGzFormat) Read(input io.Reader, destination string) error { - gzr, err := gzip.NewReader(input) - if err != nil { - return fmt.Errorf("error decompressing: %v", err) +func (tgz *TarGz) wrapWriter() { + var gzw *gzip.Writer + tgz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { + var err error + gzw, err = gzip.NewWriterLevel(w, tgz.CompressionLevel) + return gzw, err + } + tgz.Tar.cleanupWrapFn = func() { + gzw.Close() } - defer gzr.Close() - - return Tar.Read(gzr, destination) } -// Open untars source and decompresses the contents into destination. -func (tarGzFormat) Open(source, destination string) error { - f, err := os.Open(source) - if err != nil { - return fmt.Errorf("%s: failed to open archive: %v", source, err) +func (tgz *TarGz) wrapReader() { + var gzr *gzip.Reader + tgz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { + var err error + gzr, err = gzip.NewReader(r) + return gzr, err + } + tgz.Tar.cleanupWrapFn = func() { + gzr.Close() } - defer f.Close() +} + +func (tgz *TarGz) String() string { return "tar.gz" } + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(TarGz)) + _ = Writer(new(TarGz)) + _ = Archiver(new(TarGz)) + _ = Unarchiver(new(TarGz)) + _ = Walker(new(TarGz)) + _ = Extractor(new(TarGz)) +) - return TarGz.Read(f, destination) +// DefaultTarGz is a convenient archiver ready to use. +var DefaultTarGz = &TarGz{ + CompressionLevel: gzip.DefaultCompression, + Tar: DefaultTar, } diff --git a/tarlz4.go b/tarlz4.go index 1ddc881f..10be5f26 100644 --- a/tarlz4.go +++ b/tarlz4.go @@ -3,90 +3,105 @@ package archiver import ( "fmt" "io" - "os" "strings" "github.com/pierrec/lz4" ) -// TarLz4 is for TarLz4 format -var TarLz4 tarLz4Format +// TarLz4 facilitates lz4 compression +// (https://github.com/lz4/lz4/tree/master/doc) +// of tarball archives. +type TarLz4 struct { + *Tar -func init() { - RegisterFormat("TarLz4", TarLz4) + // The compression level to use when writing. + // Minimum 0 (fast compression), maximum 12 + // (most space savings). + CompressionLevel int } -type tarLz4Format struct{} +// Archive creates a compressed tar file at destination +// containing the files listed in sources. The destination +// must end with ".tar.lz4" or ".tlz4". File paths can be +// those of regular files or directories; directories will +// be recursively added. +func (tlz4 *TarLz4) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".tar.lz4") && + !strings.HasSuffix(destination, ".tlz4") { + return fmt.Errorf("output filename must have .tar.lz4 or .tlz4 extension") + } + tlz4.wrapWriter() + return tlz4.Tar.Archive(sources, destination) +} -func (tarLz4Format) Match(filename string) bool { - return strings.HasSuffix(strings.ToLower(filename), ".tar.lz4") || strings.HasSuffix(strings.ToLower(filename), ".tlz4") || isTarLz4(filename) +// Unarchive unpacks the compressed tarball at +// source to destination. Destination will be +// treated as a folder name. +func (tlz4 *TarLz4) Unarchive(source, destination string) error { + tlz4.wrapReader() + return tlz4.Tar.Unarchive(source, destination) } -// isTarLz4 checks the file has the lz4 compressed Tar format header by -// reading its beginning block. -func isTarLz4(tarlz4Path string) bool { - f, err := os.Open(tarlz4Path) - if err != nil { - return false - } - defer f.Close() +// Walk calls walkFn for each visited item in archive. +func (tlz4 *TarLz4) Walk(archive string, walkFn WalkFunc) error { + tlz4.wrapReader() + return tlz4.Tar.Walk(archive, walkFn) +} - lz4r := lz4.NewReader(f) - buf := make([]byte, tarBlockSize) - n, err := lz4r.Read(buf) - if err != nil || n < tarBlockSize { - return false - } +// Create opens tlz4 for writing a compressed +// tar archive to out. +func (tlz4 *TarLz4) Create(out io.Writer) error { + tlz4.wrapWriter() + return tlz4.Create(out) +} - return hasTarHeader(buf) +// Open opens t for reading a compressed archive from +// in. The size parameter is not used. +func (tlz4 *TarLz4) Open(in io.Reader, size int64) error { + tlz4.wrapReader() + return tlz4.Tar.Open(in, size) } -// Write outputs a .tar.lz4 file to a Writer containing -// the contents of files listed in filePaths. File paths -// can be those of regular files or directories. Regular -// files are stored at the 'root' of the archive, and -// directories are recursively added. -func (tarLz4Format) Write(output io.Writer, filePaths []string) error { - return writeTarLz4(filePaths, output, "") +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (tlz4 *TarLz4) Extract(source, target, destination string) error { + tlz4.wrapReader() + return tlz4.Tar.Extract(source, target, destination) } -// Make creates a .tar.lz4 file at tarlz4Path containing -// the contents of files listed in filePaths. File paths -// can be those of regular files or directories. Regular -// files are stored at the 'root' of the archive, and -// directories are recursively added. -func (tarLz4Format) Make(tarlz4Path string, filePaths []string) error { - out, err := os.Create(tarlz4Path) - if err != nil { - return fmt.Errorf("error creating %s: %v", tarlz4Path, err) +func (tlz4 *TarLz4) wrapWriter() { + var lz4w *lz4.Writer + tlz4.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { + lz4w = lz4.NewWriter(w) + lz4w.Header.CompressionLevel = tlz4.CompressionLevel + return lz4w, nil + } + tlz4.Tar.cleanupWrapFn = func() { + lz4w.Close() } - defer out.Close() - - return writeTarLz4(filePaths, out, tarlz4Path) } -func writeTarLz4(filePaths []string, output io.Writer, dest string) error { - lz4w := lz4.NewWriter(output) - defer lz4w.Close() - - return writeTar(filePaths, lz4w, dest) +func (tlz4 *TarLz4) wrapReader() { + tlz4.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { + return lz4.NewReader(r), nil + } } -// Read untars a .tar.xz file read from a Reader and decompresses -// the contents into destination. -func (tarLz4Format) Read(input io.Reader, destination string) error { - lz4r := lz4.NewReader(input) +func (tlz4 *TarLz4) String() string { return "tar.lz4" } - return Tar.Read(lz4r, destination) -} - -// Open untars source and decompresses the contents into destination. -func (tarLz4Format) Open(source, destination string) error { - f, err := os.Open(source) - if err != nil { - return fmt.Errorf("%s: failed to open archive: %v", source, err) - } - defer f.Close() +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(TarLz4)) + _ = Writer(new(TarLz4)) + _ = Archiver(new(TarLz4)) + _ = Unarchiver(new(TarLz4)) + _ = Walker(new(TarLz4)) + _ = Extractor(new(TarLz4)) +) - return TarLz4.Read(f, destination) +// DefaultTarLz4 is a convenient archiver ready to use. +var DefaultTarLz4 = &TarLz4{ + CompressionLevel: 9, // https://github.com/lz4/lz4/blob/1b819bfd633ae285df2dfe1b0589e1ec064f2873/lib/lz4hc.h#L48 + Tar: DefaultTar, } diff --git a/tarsz.go b/tarsz.go index 2e290190..4533c3df 100644 --- a/tarsz.go +++ b/tarsz.go @@ -3,90 +3,98 @@ package archiver import ( "fmt" "io" - "os" "strings" "github.com/golang/snappy" ) -// TarSz is for TarSz format -var TarSz tarSzFormat - -func init() { - RegisterFormat("TarSz", TarSz) +// TarSz facilitates Snappy compression +// (https://github.com/google/snappy) +// of tarball archives. +type TarSz struct { + *Tar } -type tarSzFormat struct{} +// Archive creates a compressed tar file at destination +// containing the files listed in sources. The destination +// must end with ".tar.sz" or ".tsz". File paths can be +// those of regular files or directories; directories will +// be recursively added. +func (tsz *TarSz) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".tar.sz") && + !strings.HasSuffix(destination, ".tsz") { + return fmt.Errorf("output filename must have .tar.sz or .tsz extension") + } + tsz.wrapWriter() + return tsz.Tar.Archive(sources, destination) +} -func (tarSzFormat) Match(filename string) bool { - return strings.HasSuffix(strings.ToLower(filename), ".tar.sz") || strings.HasSuffix(strings.ToLower(filename), ".tsz") || isTarSz(filename) +// Unarchive unpacks the compressed tarball at +// source to destination. Destination will be +// treated as a folder name. +func (tsz *TarSz) Unarchive(source, destination string) error { + tsz.wrapReader() + return tsz.Tar.Unarchive(source, destination) } -// isTarSz checks the file has the sz compressed Tar format header by -// reading its beginning block. -func isTarSz(tarszPath string) bool { - f, err := os.Open(tarszPath) - if err != nil { - return false - } - defer f.Close() +// Walk calls walkFn for each visited item in archive. +func (tsz *TarSz) Walk(archive string, walkFn WalkFunc) error { + tsz.wrapReader() + return tsz.Tar.Walk(archive, walkFn) +} - szr := snappy.NewReader(f) - buf := make([]byte, tarBlockSize) - n, err := szr.Read(buf) - if err != nil || n < tarBlockSize { - return false - } +// Create opens tsz for writing a compressed +// tar archive to out. +func (tsz *TarSz) Create(out io.Writer) error { + tsz.wrapWriter() + return tsz.Create(out) +} - return hasTarHeader(buf) +// Open opens t for reading a compressed archive from +// in. The size parameter is not used. +func (tsz *TarSz) Open(in io.Reader, size int64) error { + tsz.wrapReader() + return tsz.Tar.Open(in, size) } -// Write outputs a .tar.sz file to a Writer containing -// the contents of files listed in filePaths. File paths -// can be those of regular files or directories. Regular -// files are stored at the 'root' of the archive, and -// directories are recursively added. -func (tarSzFormat) Write(output io.Writer, filePaths []string) error { - return writeTarSz(filePaths, output, "") +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (tsz *TarSz) Extract(source, target, destination string) error { + tsz.wrapReader() + return tsz.Tar.Extract(source, target, destination) } -// Make creates a .tar.sz file at tarszPath containing -// the contents of files listed in filePaths. File paths -// can be those of regular files or directories. Regular -// files are stored at the 'root' of the archive, and -// directories are recursively added. -func (tarSzFormat) Make(tarszPath string, filePaths []string) error { - out, err := os.Create(tarszPath) - if err != nil { - return fmt.Errorf("error creating %s: %v", tarszPath, err) +func (tsz *TarSz) wrapWriter() { + var sw *snappy.Writer + tsz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { + sw = snappy.NewWriter(w) + return sw, nil + } + tsz.Tar.cleanupWrapFn = func() { + sw.Close() } - defer out.Close() - - return writeTarSz(filePaths, out, tarszPath) } -func writeTarSz(filePaths []string, output io.Writer, dest string) error { - szw := snappy.NewBufferedWriter(output) - defer szw.Close() - - return writeTar(filePaths, szw, dest) +func (tsz *TarSz) wrapReader() { + tsz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { + return snappy.NewReader(r), nil + } } -// Read untars a .tar.sz file read from a Reader and decompresses -// the contents into destination. -func (tarSzFormat) Read(input io.Reader, destination string) error { - szr := snappy.NewReader(input) +func (tsz *TarSz) String() string { return "tar.sz" } - return Tar.Read(szr, destination) -} - -// Open untars source and decompresses the contents into destination. -func (tarSzFormat) Open(source, destination string) error { - f, err := os.Open(source) - if err != nil { - return fmt.Errorf("%s: failed to open archive: %v", source, err) - } - defer f.Close() +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(TarSz)) + _ = Writer(new(TarSz)) + _ = Archiver(new(TarSz)) + _ = Unarchiver(new(TarSz)) + _ = Walker(new(TarSz)) + _ = Extractor(new(TarSz)) +) - return TarSz.Read(f, destination) +// DefaultTarSz is a convenient archiver ready to use. +var DefaultTarSz = &TarSz{ + Tar: DefaultTar, } diff --git a/tarxz.go b/tarxz.go index e222fb4a..c1d27ea9 100644 --- a/tarxz.go +++ b/tarxz.go @@ -3,103 +3,103 @@ package archiver import ( "fmt" "io" - "os" "strings" "github.com/ulikunitz/xz" + fastxz "github.com/xi2/xz" ) -// TarXZ is for TarXZ format -var TarXZ xzFormat - -func init() { - RegisterFormat("TarXZ", TarXZ) +// TarXz facilitates xz compression +// (https://tukaani.org/xz/format.html) +// of tarball archives. +type TarXz struct { + *Tar } -type xzFormat struct{} - -// Match returns whether filename matches this format. -func (xzFormat) Match(filename string) bool { - return strings.HasSuffix(strings.ToLower(filename), ".tar.xz") || - strings.HasSuffix(strings.ToLower(filename), ".txz") || - isTarXz(filename) -} - -// isTarXz checks the file has the xz compressed Tar format header by reading -// its beginning block. -func isTarXz(tarxzPath string) bool { - f, err := os.Open(tarxzPath) - if err != nil { - return false - } - defer f.Close() - - xzr, err := xz.NewReader(f) - if err != nil { - return false +// Archive creates a compressed tar file at destination +// containing the files listed in sources. The destination +// must end with ".tar.gz" or ".txz". File paths can be +// those of regular files or directories; directories will +// be recursively added. +func (txz *TarXz) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".tar.xz") && + !strings.HasSuffix(destination, ".txz") { + return fmt.Errorf("output filename must have .tar.xz or .txz extension") } + txz.wrapWriter() + return txz.Tar.Archive(sources, destination) +} - buf := make([]byte, tarBlockSize) - n, err := xzr.Read(buf) - if err != nil || n < tarBlockSize { - return false - } +// Unarchive unpacks the compressed tarball at +// source to destination. Destination will be +// treated as a folder name. +func (txz *TarXz) Unarchive(source, destination string) error { + txz.wrapReader() + return txz.Tar.Unarchive(source, destination) +} - return hasTarHeader(buf) +// Walk calls walkFn for each visited item in archive. +func (txz *TarXz) Walk(archive string, walkFn WalkFunc) error { + txz.wrapReader() + return txz.Tar.Walk(archive, walkFn) } -// Write outputs a .tar.xz file to a Writer containing -// the contents of files listed in filePaths. File paths -// can be those of regular files or directories. Regular -// files are stored at the 'root' of the archive, and -// directories are recursively added. -func (xzFormat) Write(output io.Writer, filePaths []string) error { - return writeTarXZ(filePaths, output, "") +// Create opens txz for writing a compressed +// tar archive to out. +func (txz *TarXz) Create(out io.Writer) error { + txz.wrapWriter() + return txz.Create(out) } -// Make creates a .tar.xz file at xzPath containing -// the contents of files listed in filePaths. File -// paths can be those of regular files or directories. -// Regular files are stored at the 'root' of the -// archive, and directories are recursively added. -func (xzFormat) Make(xzPath string, filePaths []string) error { - out, err := os.Create(xzPath) - if err != nil { - return fmt.Errorf("error creating %s: %v", xzPath, err) - } - defer out.Close() +// Open opens t for reading a compressed archive from +// in. The size parameter is not used. +func (txz *TarXz) Open(in io.Reader, size int64) error { + txz.wrapReader() + return txz.Tar.Open(in, size) +} - return writeTarXZ(filePaths, out, xzPath) +// Extract extracts a single file from the tar archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (txz *TarXz) Extract(source, target, destination string) error { + txz.wrapReader() + return txz.Tar.Extract(source, target, destination) } -func writeTarXZ(filePaths []string, output io.Writer, dest string) error { - xzw, err := xz.NewWriter(output) - if err != nil { - return fmt.Errorf("error compressing xz: %v", err) +func (txz *TarXz) wrapWriter() { + var xzw *xz.Writer + txz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { + var err error + xzw, err = xz.NewWriter(w) + return xzw, err + } + txz.Tar.cleanupWrapFn = func() { + xzw.Close() } - defer xzw.Close() - - return writeTar(filePaths, xzw, dest) } -// Read untars a .tar.xz file read from a Reader and decompresses -// the contents into destination. -func (xzFormat) Read(input io.Reader, destination string) error { - xzr, err := xz.NewReader(input) - if err != nil { - return fmt.Errorf("error decompressing xz: %v", err) +func (txz *TarXz) wrapReader() { + var xzr *fastxz.Reader + txz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { + var err error + xzr, err = fastxz.NewReader(r, 0) + return xzr, err } - - return Tar.Read(xzr, destination) } -// Open untars source and decompresses the contents into destination. -func (xzFormat) Open(source, destination string) error { - f, err := os.Open(source) - if err != nil { - return fmt.Errorf("%s: failed to open archive: %v", source, err) - } - defer f.Close() +func (txz *TarXz) String() string { return "tar.xz" } + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(TarXz)) + _ = Writer(new(TarXz)) + _ = Archiver(new(TarXz)) + _ = Unarchiver(new(TarXz)) + _ = Walker(new(TarXz)) + _ = Extractor(new(TarXz)) +) - return TarXZ.Read(f, destination) +// DefaultTarXz is a convenient archiver ready to use. +var DefaultTarXz = &TarXz{ + Tar: DefaultTar, } diff --git a/archiver/xz.go b/xz.go similarity index 100% rename from archiver/xz.go rename to xz.go diff --git a/zip.go b/zip.go index 9d20bc1b..9828c630 100644 --- a/zip.go +++ b/zip.go @@ -1,238 +1,573 @@ -// Package archiver makes it super easy to create and open .zip, -// .tar.gz, and .tar.bz2 files. package archiver import ( "archive/zip" "bytes" + "compress/flate" "fmt" "io" - "io/ioutil" + "log" "os" "path" "path/filepath" "strings" ) -// Zip is for Zip format -var Zip zipFormat - -func init() { - RegisterFormat("Zip", Zip) +// Zip provides facilities for operating ZIP archives. +// See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT. +type Zip struct { + // The compression level to use, as described + // in the compress/flate package. + CompressionLevel int + + // Whether to overwrite existing files; if false, + // an error is returned if the file exists. + OverwriteExisting bool + + // Whether to make all the directories necessary + // to create a zip archive in the desired path. + MkdirAll bool + + // If enabled, selective compression will only + // compress files which are not already in a + // compressed format; this is decided based + // simply on file extension. + SelectiveCompression bool + + // A single top-level folder can be implicitly + // created by the Archive or Unarchive methods + // if the files to be added to the archive + // or the files to be extracted from the archive + // do not all have a common root. This roughly + // mimics the behavior of archival tools integrated + // into OS file browsers which create a subfolder + // to avoid unexpectedly littering the destination + // folder with potentially many files, causing a + // problematic cleanup/organization situation. + // This feature is available for both creation + // and extraction of archives, but may be slightly + // inefficient with lots and lots of files, + // especially on extraction. + ImplicitTopLevelFolder bool + + // If true, errors encountered during reading + // or writing a single file will be logged and + // the operation will continue on remaining files. + ContinueOnError bool + + zw *zip.Writer + zr *zip.Reader + ridx int } -type zipFormat struct{} +// Archive creates a .zip file at destination containing +// the files listed in sources. The destination must end +// with ".zip". File paths can be those of regular files +// or directories. Regular files are stored at the 'root' +// of the archive, and directories are recursively added. +func (z *Zip) Archive(sources []string, destination string) error { + if !strings.HasSuffix(destination, ".zip") { + return fmt.Errorf("output filename must have .zip extension") + } + if !z.OverwriteExisting && fileExists(destination) { + return fmt.Errorf("file already exists: %s", destination) + } + + // make the folder to contain the resulting archive + // if it does not already exist + destDir := filepath.Dir(destination) + if z.MkdirAll && !fileExists(destDir) { + err := mkdir(destDir) + if err != nil { + return fmt.Errorf("making folder for destination: %v", err) + } + } + + out, err := os.Create(destination) + if err != nil { + return fmt.Errorf("creating %s: %v", destination, err) + } + defer out.Close() + + err = z.Create(out) + if err != nil { + return fmt.Errorf("creating zip: %v", err) + } + defer z.Close() + + var topLevelFolder string + if z.ImplicitTopLevelFolder && multipleTopLevels(sources) { + topLevelFolder = folderNameFromFileName(destination) + } + + for _, source := range sources { + err := z.writeWalk(source, topLevelFolder, destination) + if err != nil { + return fmt.Errorf("walking %s: %v", source, err) + } + } -func (zipFormat) Match(filename string) bool { - return strings.HasSuffix(strings.ToLower(filename), ".zip") || isZip(filename) + return nil } -// isZip checks the file has the Zip format signature by reading its beginning -// bytes and matching it against "PK\x03\x04" -func isZip(zipPath string) bool { - f, err := os.Open(zipPath) +// Unarchive unpacks the .zip file at source to destination. +// Destination will be treated as a folder name. +func (z *Zip) Unarchive(source, destination string) error { + if !fileExists(destination) && z.MkdirAll { + err := mkdir(destination) + if err != nil { + return fmt.Errorf("preparing destination: %v", err) + } + } + + file, err := os.Open(source) if err != nil { - return false + return fmt.Errorf("opening source file: %v", err) } - defer f.Close() + defer file.Close() - buf := make([]byte, 4) - if n, err := f.Read(buf); err != nil || n < 4 { - return false - } - - return bytes.Equal(buf, []byte("PK\x03\x04")) -} - -// Write outputs a .zip file to the given writer with -// the contents of files listed in filePaths. File paths -// can be those of regular files or directories. Regular -// files are stored at the 'root' of the archive, and -// directories are recursively added. -// -// Files with an extension for formats that are already -// compressed will be stored only, not compressed. -func (zipFormat) Write(output io.Writer, filePaths []string) error { - w := zip.NewWriter(output) - for _, fpath := range filePaths { - if err := zipFile(w, fpath); err != nil { - w.Close() - return err + fileInfo, err := file.Stat() + if err != nil { + return fmt.Errorf("statting source file: %v", err) + } + + err = z.Open(file, fileInfo.Size()) + if err != nil { + return fmt.Errorf("opening zip archive for reading: %v", err) + } + defer z.Close() + + // if the files in the archive do not all share a common + // root, then make sure we extract to a single subfolder + // rather than potentially littering the destination... + if z.ImplicitTopLevelFolder { + files := make([]string, len(z.zr.File)) + for i := range z.zr.File { + files[i] = z.zr.File[i].Name + } + if multipleTopLevels(files) { + destination = filepath.Join(destination, folderNameFromFileName(source)) } } - return w.Close() + for { + err := z.extractNext(destination) + if err == io.EOF { + break + } + if err != nil { + if z.ContinueOnError { + log.Printf("[ERROR] Reading file in zip archive: %v", err) + continue + } + return fmt.Errorf("reading file in zip archive: %v", err) + } + } + + return nil } -// Make creates a .zip file in the location zipPath containing -// the contents of files listed in filePaths. File paths -// can be those of regular files or directories. Regular -// files are stored at the 'root' of the archive, and -// directories are recursively added. -// -// Files with an extension for formats that are already -// compressed will be stored only, not compressed. -func (zipFormat) Make(zipPath string, filePaths []string) error { - out, err := os.Create(zipPath) +func (z *Zip) extractNext(to string) error { + f, err := z.Read() if err != nil { - return fmt.Errorf("error creating %s: %v", zipPath, err) + return err // don't wrap error; calling loop must break on io.EOF + } + defer f.Close() + header, ok := f.Header.(zip.FileHeader) + if !ok { + return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) + } + return z.extractFile(f, filepath.Join(to, header.Name)) +} + +func (z *Zip) extractFile(f File, to string) error { + // if a directory, no content; simply make the directory and return + if f.IsDir() { + return mkdir(to) + } + + // do not overwrite existing files, if configured + if !z.OverwriteExisting && fileExists(to) { + return fmt.Errorf("file already exists: %s", to) } - defer out.Close() - return Zip.Write(out, filePaths) + return writeNewFile(to, f, f.Mode()) } -func zipFile(w *zip.Writer, source string) error { - sourceInfo, err := os.Stat(source) +func (z *Zip) writeWalk(source, topLevelFolder, destination string) error { + sourceAbs, err := filepath.Abs(source) + if err != nil { + return fmt.Errorf("getting absolute path: %v", err) + } + sourceInfo, err := os.Stat(sourceAbs) if err != nil { return fmt.Errorf("%s: stat: %v", source, err) } + destAbs, err := filepath.Abs(destination) + if err != nil { + return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err) + } var baseDir string + if topLevelFolder != "" { + baseDir = topLevelFolder + } if sourceInfo.IsDir() { - baseDir = filepath.Base(source) + baseDir = path.Join(baseDir, sourceInfo.Name()) } return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error { - if err != nil { - return fmt.Errorf("walking to %s: %v", fpath, err) + handleErr := func(err error) error { + if z.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", fpath, err) + return nil + } + return err } - - header, err := zip.FileInfoHeader(info) if err != nil { - return fmt.Errorf("%s: getting header: %v", fpath, err) + return handleErr(fmt.Errorf("traversing %s: %v", fpath, err)) } - - if baseDir != "" { - name, err := filepath.Rel(source, fpath) - if err != nil { - return err - } - header.Name = path.Join(baseDir, filepath.ToSlash(name)) - } - - if info.IsDir() { - header.Name += "/" - header.Method = zip.Store - } else { - ext := strings.ToLower(path.Ext(header.Name)) - if _, ok := compressedFormats[ext]; ok { - header.Method = zip.Store - } else { - header.Method = zip.Deflate - } + if info == nil { + return handleErr(fmt.Errorf("%s: no file info", fpath)) } - writer, err := w.CreateHeader(header) + // make sure we do not copy the output file into the output + // file; that results in an infinite loop and disk exhaustion! + fpathAbs, err := filepath.Abs(fpath) if err != nil { - return fmt.Errorf("%s: making header: %v", fpath, err) + return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err)) } - - if info.IsDir() { + if within(fpathAbs, destAbs) { return nil } - if header.Mode().IsRegular() { - file, err := os.Open(fpath) - if err != nil { - return fmt.Errorf("%s: opening: %v", fpath, err) - } - defer file.Close() + // build the name to be used within the archive + name, err := filepath.Rel(source, fpath) + if err != nil { + return handleErr(err) + } + nameInArchive := path.Join(baseDir, filepath.ToSlash(name)) - _, err = io.CopyN(writer, file, info.Size()) - if err != nil && err != io.EOF { - return fmt.Errorf("%s: copying contents: %v", fpath, err) - } + file, err := os.Open(fpath) + if err != nil { + return handleErr(fmt.Errorf("%s: opening: %v", fpath, err)) + } + defer file.Close() + + err = z.Write(File{ + FileInfo: FileInfo{ + FileInfo: info, + CustomName: nameInArchive, + }, + ReadCloser: file, + }) + if err != nil { + return handleErr(fmt.Errorf("%s: writing: %s", fpath, err)) } return nil }) } -// Read unzips the .zip file read from the input Reader into destination. -func (zipFormat) Read(input io.Reader, destination string) error { - buf, err := ioutil.ReadAll(input) - if err != nil { - return err +// Create opens z for writing a ZIP archive to out. +func (z *Zip) Create(out io.Writer) error { + if z.zw != nil { + return fmt.Errorf("zip archive is already created for writing") + } + z.zw = zip.NewWriter(out) + if z.CompressionLevel != flate.DefaultCompression { + z.zw.RegisterCompressor(zip.Deflate, func(out io.Writer) (io.WriteCloser, error) { + return flate.NewWriter(out, z.CompressionLevel) + }) + } + return nil +} + +// Write writes f to z, which must have been opened for writing first. +func (z *Zip) Write(f File) error { + if z.zw == nil { + return fmt.Errorf("zip archive was not created for writing first") + } + if f.FileInfo == nil { + return fmt.Errorf("no file info") + } + if f.FileInfo.Name() == "" { + return fmt.Errorf("missing file name") + } + if f.ReadCloser == nil { + return fmt.Errorf("%s: no way to read file contents", f.Name()) } - rdr := bytes.NewReader(buf) - r, err := zip.NewReader(rdr, rdr.Size()) + header, err := zip.FileInfoHeader(f) if err != nil { - return err + return fmt.Errorf("%s: getting header: %v", f.Name(), err) } - return unzipAll(r, destination) -} + if f.IsDir() { + header.Name += "/" // required - strangely no mention of this in zip spec? but is in godoc... + header.Method = zip.Store + } else { + ext := strings.ToLower(path.Ext(header.Name)) + if _, ok := compressedFormats[ext]; ok && z.SelectiveCompression { + header.Method = zip.Store + } else { + header.Method = zip.Deflate + } + } -// Open unzips the .zip file at source into destination. -func (zipFormat) Open(source, destination string) error { - r, err := zip.OpenReader(source) + writer, err := z.zw.CreateHeader(header) if err != nil { - return err + return fmt.Errorf("%s: making header: %v", f.Name(), err) } - defer r.Close() - return unzipAll(&r.Reader, destination) -} + if f.IsDir() { + return nil + } -func unzipAll(r *zip.Reader, destination string) error { - for _, zf := range r.File { - if err := unzipFile(zf, destination); err != nil { - return err + if header.Mode().IsRegular() { + _, err := io.Copy(writer, f) + if err != nil { + return fmt.Errorf("%s: copying contents: %v", f.Name(), err) } } return nil } -func unzipFile(zf *zip.File, destination string) error { - err := sanitizeExtractPath(zf.Name, destination) +// Open opens z for reading an archive from in, +// which is expected to have the given size and +// which must be an io.ReaderAt. +func (z *Zip) Open(in io.Reader, size int64) error { + inRdrAt, ok := in.(io.ReaderAt) + if !ok { + return fmt.Errorf("reader must be io.ReaderAt") + } + if z.zr != nil { + return fmt.Errorf("zip archive is already open for reading") + } + var err error + z.zr, err = zip.NewReader(inRdrAt, size) if err != nil { - return err + return fmt.Errorf("creating reader: %v", err) } + z.ridx = 0 + return nil +} + +// Read reads the next file from z, which must have +// already been opened for reading. If there are no +// more files, the error is io.EOF. The File must +// be closed when finished reading from it. +func (z *Zip) Read() (File, error) { + if z.zr == nil { + return File{}, fmt.Errorf("zip archive is not open") + } + if z.ridx >= len(z.zr.File) { + return File{}, io.EOF + } + + // access the file and increment counter so that + // if there is an error processing this file, the + // caller can still iterate to the next file + zf := z.zr.File[z.ridx] + z.ridx++ - if strings.HasSuffix(zf.Name, "/") { - return mkdir(filepath.Join(destination, zf.Name)) + file := File{ + FileInfo: zf.FileInfo(), + Header: zf.FileHeader, } rc, err := zf.Open() if err != nil { - return fmt.Errorf("%s: open compressed file: %v", zf.Name, err) + return file, fmt.Errorf("%s: open compressed file: %v", zf.Name, err) } - defer rc.Close() + file.ReadCloser = rc - return writeNewFile(filepath.Join(destination, zf.Name), rc, zf.FileInfo().Mode()) + return file, nil +} + +// Close closes the zip archive(s) opened by Create and Open. +func (z *Zip) Close() error { + if z.zr != nil { + z.zr = nil + } + if z.zw != nil { + zw := z.zw + z.zw = nil + return zw.Close() + } + return nil } +// Walk calls walkFn for each visited item in archive. +func (z *Zip) Walk(archive string, walkFn WalkFunc) error { + zr, err := zip.OpenReader(archive) + if err != nil { + return fmt.Errorf("opening zip reader: %v", err) + } + defer zr.Close() + + for _, zf := range zr.File { + zfrc, err := zf.Open() + if err != nil { + zfrc.Close() + if z.ContinueOnError { + log.Printf("[ERROR] Opening %s: %v", zf.Name, err) + continue + } + return fmt.Errorf("opening %s: %v", zf.Name, err) + } + + err = walkFn(File{ + FileInfo: zf.FileInfo(), + Header: zf.FileHeader, + ReadCloser: zfrc, + }) + zfrc.Close() + if err != nil { + if err == ErrStopWalk { + break + } + if z.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", zf.Name, err) + continue + } + return fmt.Errorf("walking %s: %v", zf.Name, err) + } + } + + return nil +} + +// Extract extracts a single file from the zip archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (z *Zip) Extract(source, target, destination string) error { + // target refers to a path inside the archive, which should be clean also + target = path.Clean(target) + + // if the target ends up being a directory, then + // we will continue walking and extracting files + // until we are no longer within that directory + var targetDirPath string + + return z.Walk(source, func(f File) error { + zfh, ok := f.Header.(zip.FileHeader) + if !ok { + return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) + } + + // importantly, cleaning the path strips tailing slash, + // which must be appended to folders within the archive + name := path.Clean(zfh.Name) + if f.IsDir() && target == name { + targetDirPath = path.Dir(name) + } + + if within(target, zfh.Name) { + // either this is the exact file we want, or is + // in the directory we want to extract + + // build the filename we will extract to + end, err := filepath.Rel(targetDirPath, zfh.Name) + if err != nil { + return fmt.Errorf("relativizing paths: %v", err) + } + joined := filepath.Join(destination, end) + + err = z.extractFile(f, joined) + if err != nil { + return fmt.Errorf("extracting file %s: %v", zfh.Name, err) + } + + // if our target was not a directory, stop walk + if targetDirPath == "" { + return ErrStopWalk + } + } else if targetDirPath != "" { + // finished walking the entire directory + return ErrStopWalk + } + + return nil + }) +} + +// Match returns true if the format of file matches this +// type's format. It should not affect reader position. +func (*Zip) Match(file *os.File) (bool, error) { + currentPos, err := file.Seek(0, io.SeekCurrent) + if err != nil { + return false, err + } + _, err = file.Seek(0, 0) + if err != nil { + return false, err + } + defer file.Seek(currentPos, io.SeekStart) + + buf := make([]byte, 4) + if n, err := file.Read(buf); err != nil || n < 4 { + return false, nil + } + return bytes.Equal(buf, []byte("PK\x03\x04")), nil +} + +func (z *Zip) String() string { return "zip" } + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(Zip)) + _ = Writer(new(Zip)) + _ = Archiver(new(Zip)) + _ = Unarchiver(new(Zip)) + _ = Walker(new(Zip)) + _ = Extractor(new(Zip)) + _ = Matcher(new(Zip)) +) + // compressedFormats is a (non-exhaustive) set of lowercased // file extensions for formats that are typically already -// compressed. Compressing already-compressed files often -// results in a larger file, so when possible, we check this -// set to avoid that. +// compressed. Compressing files that are already compressed +// is inefficient, so use this set of extension to avoid that. var compressedFormats = map[string]struct{}{ ".7z": {}, ".avi": {}, + ".br": {}, ".bz2": {}, ".cab": {}, + ".docx": {}, ".gif": {}, ".gz": {}, ".jar": {}, ".jpeg": {}, ".jpg": {}, ".lz": {}, + ".lz4": {}, ".lzma": {}, + ".m4v": {}, ".mov": {}, ".mp3": {}, ".mp4": {}, ".mpeg": {}, ".mpg": {}, ".png": {}, + ".pptx": {}, ".rar": {}, + ".sz": {}, ".tbz2": {}, ".tgz": {}, + ".tsz": {}, ".txz": {}, + ".xlsx": {}, ".xz": {}, ".zip": {}, ".zipx": {}, } + +// DefaultZip is a convenient archiver ready to use. +var DefaultZip = &Zip{ + CompressionLevel: flate.DefaultCompression, + MkdirAll: true, + SelectiveCompression: true, +}