From 347a1af40e706e32287b2e9bc32f9ff91870ed31 Mon Sep 17 00:00:00 2001 From: Saverio Miroddi Date: Thu, 4 Apr 2019 12:09:32 +0200 Subject: [PATCH] Add support for normalizing the archived files metadata When archiving files from different machines, even if the source content is the same, different archive files may be generated due to a variety of factors, like compression libraries or file metadata difference (permissions, etc.). This creates problems in situations where it's expected that the exact same archive should always be generated from a given source content (typically, a Lambda function resource). An example context that causes differences is Git on different O/Ss - filesystems may have different mount permission masks, and be the same for Git (which doesn't distinguish 0644 from 0664), but end up generating a different archive. The `normalize_files_metadata` option makes the archiver produce a "normalized" archive, which solves the problem. In the Zip case, this is obtained by setting: - the compression method to Store - the modification date to a fixed one - the permissions to 0644 Disabling compression is a tradeoff, however, in the context where this functionality is used (eg. Lambda function resources), the difference is negligible. --- archive/archiver.go | 8 +-- archive/data_source_archive_file.go | 17 +++-- archive/zip_archiver.go | 105 +++++++++++++++++++++++----- archive/zip_archiver_test.go | 60 ++++++++++++++-- 4 files changed, 158 insertions(+), 32 deletions(-) diff --git a/archive/archiver.go b/archive/archiver.go index 7491f98b..7b4bdf6d 100644 --- a/archive/archiver.go +++ b/archive/archiver.go @@ -6,10 +6,10 @@ import ( ) type Archiver interface { - ArchiveContent(content []byte, infilename string) error - ArchiveFile(infilename string) error - ArchiveDir(indirname string, excludes []string) error - ArchiveMultiple(content map[string][]byte) error + ArchiveContent(content []byte, infilename string, normalizeFilesMetadata bool) error + ArchiveFile(infilename string, normalizeFilesMetadata bool) error + ArchiveDir(indirname string, excludes []string, normalizeFilesMetadata bool) error + ArchiveMultiple(content map[string][]byte, normalizeFilesMetadata bool) error } type ArchiverBuilder func(filepath string) Archiver diff --git a/archive/data_source_archive_file.go b/archive/data_source_archive_file.go index a53fbdce..0b96609c 100644 --- a/archive/data_source_archive_file.go +++ b/archive/data_source_archive_file.go @@ -26,6 +26,12 @@ func dataSourceFile() *schema.Resource { Required: true, ForceNew: true, }, + "normalize_files_metadata": { + Type: schema.TypeBool, + Optional: true, + ForceNew: true, + Default: false, + }, "source": { Type: schema.TypeSet, Optional: true, @@ -165,6 +171,7 @@ func expandStringList(configured []interface{}) []string { func archive(d *schema.ResourceData) error { archiveType := d.Get("type").(string) outputPath := d.Get("output_path").(string) + normalizeFilesMetadata := d.Get("normalize_files_metadata").(bool) archiver := getArchiver(archiveType, outputPath) if archiver == nil { @@ -175,21 +182,21 @@ func archive(d *schema.ResourceData) error { if excludes, ok := d.GetOk("excludes"); ok { excludeList := expandStringList(excludes.(*schema.Set).List()) - if err := archiver.ArchiveDir(dir.(string), excludeList); err != nil { + if err := archiver.ArchiveDir(dir.(string), excludeList, normalizeFilesMetadata); err != nil { return fmt.Errorf("error archiving directory: %s", err) } } else { - if err := archiver.ArchiveDir(dir.(string), []string{""}); err != nil { + if err := archiver.ArchiveDir(dir.(string), []string{""}, normalizeFilesMetadata); err != nil { return fmt.Errorf("error archiving directory: %s", err) } } } else if file, ok := d.GetOk("source_file"); ok { - if err := archiver.ArchiveFile(file.(string)); err != nil { + if err := archiver.ArchiveFile(file.(string), normalizeFilesMetadata); err != nil { return fmt.Errorf("error archiving file: %s", err) } } else if filename, ok := d.GetOk("source_content_filename"); ok { content := d.Get("source_content").(string) - if err := archiver.ArchiveContent([]byte(content), filename.(string)); err != nil { + if err := archiver.ArchiveContent([]byte(content), filename.(string), normalizeFilesMetadata); err != nil { return fmt.Errorf("error archiving content: %s", err) } } else if v, ok := d.GetOk("source"); ok { @@ -199,7 +206,7 @@ func archive(d *schema.ResourceData) error { src := v.(map[string]interface{}) content[src["filename"].(string)] = []byte(src["content"].(string)) } - if err := archiver.ArchiveMultiple(content); err != nil { + if err := archiver.ArchiveMultiple(content, normalizeFilesMetadata); err != nil { return fmt.Errorf("error archiving content: %s", err) } } else { diff --git a/archive/zip_archiver.go b/archive/zip_archiver.go index 1ed341f9..e0af7c32 100644 --- a/archive/zip_archiver.go +++ b/archive/zip_archiver.go @@ -3,6 +3,7 @@ package archive import ( "archive/zip" "fmt" + "io" "io/ioutil" "os" "path/filepath" @@ -10,6 +11,10 @@ import ( "time" ) +const ( + uint32max = (1 << 32) - 1 +) + type ZipArchiver struct { filepath string filewriter *os.File @@ -22,22 +27,35 @@ func NewZipArchiver(filepath string) Archiver { } } -func (a *ZipArchiver) ArchiveContent(content []byte, infilename string) error { +func (a *ZipArchiver) ArchiveContent(content []byte, infilename string, normalizeFilesMetadata bool) error { if err := a.open(); err != nil { return err } defer a.close() - f, err := a.writer.Create(filepath.ToSlash(infilename)) - if err != nil { - return err + var f io.Writer + var err error + + if normalizeFilesMetadata { + fh := prepareEmptyHeader(content, infilename) + normalizeCompressingFile(fh) + + f, err = a.writer.CreateHeader(fh) + if err != nil { + return fmt.Errorf("error creating file inside archive: %s", err) + } + } else { + f, err = a.writer.Create(filepath.ToSlash(infilename)) + if err != nil { + return err + } } _, err = f.Write(content) return err } -func (a *ZipArchiver) ArchiveFile(infilename string) error { +func (a *ZipArchiver) ArchiveFile(infilename string, normalizeFilesMetadata bool) error { fi, err := assertValidFile(infilename) if err != nil { return err @@ -58,9 +76,14 @@ func (a *ZipArchiver) ArchiveFile(infilename string) error { return fmt.Errorf("error creating file header: %s", err) } fh.Name = filepath.ToSlash(fi.Name()) - fh.Method = zip.Deflate - // fh.Modified alone isn't enough when using a zero value - fh.SetModTime(time.Time{}) + + if normalizeFilesMetadata { + normalizeCompressingFile(fh) + } else { + fh.Method = zip.Deflate + // fh.Modified alone isn't enough when using a zero value + fh.SetModTime(time.Time{}) + } f, err := a.writer.CreateHeader(fh) if err != nil { @@ -84,7 +107,38 @@ func checkMatch(fileName string, excludes []string) (value bool) { return false } -func (a *ZipArchiver) ArchiveDir(indirname string, excludes []string) error { +// The basic file header is very simple. The UncompressedSize logic is not a real-world use case +// in this context, but "640K ought to be enough for anybody". +// +// For reference, see golang/src/archive/zip/struct.go. +func prepareEmptyHeader(content []byte, infilename string) *zip.FileHeader { + fh := &zip.FileHeader{ + Name: filepath.ToSlash(infilename), + UncompressedSize64: uint64(len(content)), + } + + if fh.UncompressedSize64 > uint32max { + fh.UncompressedSize = uint32max + } else { + fh.UncompressedSize = uint32(fh.UncompressedSize64) + } + + return fh +} + +// Normalize the fields: +// +// - no compression, so the compressed stream is essentially a copy; +// - fixed date; +// - fixed file permissions. +// +func normalizeCompressingFile(fh *zip.FileHeader) { + fh.Method = zip.Store + fh.SetModTime(time.Date(1981, 4, 10, 0, 0, 0, 0, time.UTC)) + fh.SetMode(0644) +} + +func (a *ZipArchiver) ArchiveDir(indirname string, excludes []string, normalizeFilesMetadata bool) error { _, err := assertValidDir(indirname) if err != nil { return err @@ -128,9 +182,14 @@ func (a *ZipArchiver) ArchiveDir(indirname string, excludes []string) error { return fmt.Errorf("error creating file header: %s", err) } fh.Name = filepath.ToSlash(relname) - fh.Method = zip.Deflate - // fh.Modified alone isn't enough when using a zero value - fh.SetModTime(time.Time{}) + + if normalizeFilesMetadata { + normalizeCompressingFile(fh) + } else { + fh.Method = zip.Deflate + // fh.Modified alone isn't enough when using a zero value + fh.SetModTime(time.Time{}) + } f, err := a.writer.CreateHeader(fh) if err != nil { @@ -145,7 +204,7 @@ func (a *ZipArchiver) ArchiveDir(indirname string, excludes []string) error { }) } -func (a *ZipArchiver) ArchiveMultiple(content map[string][]byte) error { +func (a *ZipArchiver) ArchiveMultiple(content map[string][]byte, normalizeFilesMetadata bool) error { if err := a.open(); err != nil { return err } @@ -161,10 +220,24 @@ func (a *ZipArchiver) ArchiveMultiple(content map[string][]byte) error { sort.Strings(keys) for _, filename := range keys { - f, err := a.writer.Create(filepath.ToSlash(filename)) - if err != nil { - return err + var f io.Writer + var err error + + if normalizeFilesMetadata { + fh := prepareEmptyHeader(content[filename], filename) + normalizeCompressingFile(fh) + + f, err = a.writer.CreateHeader(fh) + if err != nil { + return fmt.Errorf("error creating file inside archive: %s", err) + } + } else { + f, err = a.writer.Create(filepath.ToSlash(filename)) + if err != nil { + return err + } } + _, err = f.Write(content[filename]) if err != nil { return err diff --git a/archive/zip_archiver_test.go b/archive/zip_archiver_test.go index 00a5dbf3..1877ce57 100644 --- a/archive/zip_archiver_test.go +++ b/archive/zip_archiver_test.go @@ -16,7 +16,7 @@ import ( func TestZipArchiver_Content(t *testing.T) { zipfilepath := "archive-content.zip" archiver := NewZipArchiver(zipfilepath) - if err := archiver.ArchiveContent([]byte("This is some content"), "content.txt"); err != nil { + if err := archiver.ArchiveContent([]byte("This is some content"), "content.txt", false); err != nil { t.Fatalf("unexpected error: %s", err) } @@ -25,10 +25,20 @@ func TestZipArchiver_Content(t *testing.T) { }) } +func TestZipArchiver_Content_WithNormalizedFilesMetadata(t *testing.T) { + zipfilepath := "archive-content.zip" + archiver := NewZipArchiver(zipfilepath) + if err := archiver.ArchiveContent([]byte("This is some content"), "content.txt", true); err != nil { + t.Fatalf("unexpected error: %s", err) + } + + ensureFileChecksum(t, zipfilepath, "952e89afb0435cd5e01e3e4cdf22c5b0") +} + func TestZipArchiver_File(t *testing.T) { zipfilepath := "archive-file.zip" archiver := NewZipArchiver(zipfilepath) - if err := archiver.ArchiveFile("./test-fixtures/test-file.txt"); err != nil { + if err := archiver.ArchiveFile("./test-fixtures/test-file.txt", false); err != nil { t.Fatalf("unexpected error: %s", err) } @@ -45,7 +55,7 @@ func TestZipArchiver_FileModified(t *testing.T) { var zip = func() { archiver := NewZipArchiver(zipFilePath) - if err := archiver.ArchiveFile(toZipPath); err != nil { + if err := archiver.ArchiveFile(toZipPath, false); err != nil { t.Fatalf("unexpected error: %s", err) } } @@ -75,10 +85,20 @@ func TestZipArchiver_FileModified(t *testing.T) { } } +func TestZipArchiver_File_WithNormalizedFilesMetadata(t *testing.T) { + zipfilepath := "archive-file.zip" + archiver := NewZipArchiver(zipfilepath) + if err := archiver.ArchiveFile("./test-fixtures/test-file.txt", true); err != nil { + t.Fatalf("unexpected error: %s", err) + } + + ensureFileChecksum(t, zipfilepath, "86f7cb871bc437b8174fca96bf7a464f") +} + func TestZipArchiver_Dir(t *testing.T) { zipfilepath := "archive-dir.zip" archiver := NewZipArchiver(zipfilepath) - if err := archiver.ArchiveDir("./test-fixtures/test-dir", []string{""}); err != nil { + if err := archiver.ArchiveDir("./test-fixtures/test-dir", []string{""}, false); err != nil { t.Fatalf("unexpected error: %s", err) } @@ -92,7 +112,7 @@ func TestZipArchiver_Dir(t *testing.T) { func TestZipArchiver_Dir_Exclude(t *testing.T) { zipfilepath := "archive-dir.zip" archiver := NewZipArchiver(zipfilepath) - if err := archiver.ArchiveDir("./test-fixtures/test-dir", []string{"file2.txt"}); err != nil { + if err := archiver.ArchiveDir("./test-fixtures/test-dir", []string{"file2.txt"}, false); err != nil { t.Fatalf("unexpected error: %s", err) } @@ -105,7 +125,7 @@ func TestZipArchiver_Dir_Exclude(t *testing.T) { func TestZipArchiver_Dir_Exclude_With_Directory(t *testing.T) { zipfilepath := "archive-dir.zip" archiver := NewZipArchiver(zipfilepath) - if err := archiver.ArchiveDir("./test-fixtures/", []string{"test-dir", "test-dir2/file2.txt"}); err != nil { + if err := archiver.ArchiveDir("./test-fixtures/", []string{"test-dir", "test-dir2/file2.txt"}, false); err != nil { t.Fatalf("unexpected error: %s", err) } @@ -116,6 +136,16 @@ func TestZipArchiver_Dir_Exclude_With_Directory(t *testing.T) { }) } +func TestZipArchiver_Dir_WithNormalizedFilesMetadata(t *testing.T) { + zipfilepath := "archive-dir.zip" + archiver := NewZipArchiver(zipfilepath) + if err := archiver.ArchiveDir("./test-fixtures/test-dir", []string{""}, true); err != nil { + t.Fatalf("unexpected error: %s", err) + } + + ensureFileChecksum(t, zipfilepath, "dfb9a8da8c73034f51a5c3c5d822e64b") +} + func TestZipArchiver_Multiple(t *testing.T) { zipfilepath := "archive-content.zip" content := map[string][]byte{ @@ -125,7 +155,7 @@ func TestZipArchiver_Multiple(t *testing.T) { } archiver := NewZipArchiver(zipfilepath) - if err := archiver.ArchiveMultiple(content); err != nil { + if err := archiver.ArchiveMultiple(content, false); err != nil { t.Fatalf("unexpected error: %s", err) } @@ -133,6 +163,22 @@ func TestZipArchiver_Multiple(t *testing.T) { } +func TestZipArchiver_Multiple_WithNormalizedFilesMetadata(t *testing.T) { + zipfilepath := "archive-content.zip" + content := map[string][]byte{ + "file1.txt": []byte("This is file 1"), + "file2.txt": []byte("This is file 2"), + "file3.txt": []byte("This is file 3"), + } + + archiver := NewZipArchiver(zipfilepath) + if err := archiver.ArchiveMultiple(content, true); err != nil { + t.Fatalf("unexpected error: %s", err) + } + + ensureFileChecksum(t, zipfilepath, "dfb9a8da8c73034f51a5c3c5d822e64b") +} + func ensureContents(t *testing.T, zipfilepath string, wants map[string][]byte) { r, err := zip.OpenReader(zipfilepath) if err != nil {