From 4f098c675e6e8fc70a514bf0fba1a9c3122712ec Mon Sep 17 00:00:00 2001 From: eejayes Date: Tue, 30 Apr 2024 16:40:00 +0200 Subject: [PATCH] Support duplicate paths in tar archives Duplicate path entries are made possible within tar archives as discussed in feature request #849. This includes an interaction with create parents, where the only logical scenario which would require inference of a parent directory is when one does not already exist. This is because allowance of duplicates is only useful when explicit paths are declared. RELNOTES: Duplicate path entries supported within tar archives --- docs/latest.md | 13 +++---- pkg/private/tar/build_tar.py | 10 ++++-- pkg/private/tar/tar.bzl | 4 +++ pkg/private/tar/tar_writer.py | 49 ++++++++++++++------------- tests/tar/BUILD | 8 +++++ tests/tar/pkg_tar_test.py | 9 +++++ tests/tar/tar_writer_test.py | 46 +++++++++++++++++++++++-- tests/testdata/duplicate_entries.tar | Bin 0 -> 10240 bytes 8 files changed, 105 insertions(+), 34 deletions(-) create mode 100644 tests/testdata/duplicate_entries.tar diff --git a/docs/latest.md b/docs/latest.md index c0c98221..e38e1137 100755 --- a/docs/latest.md +++ b/docs/latest.md @@ -283,11 +283,11 @@ Rules for making .tar files. ## pkg_tar
-pkg_tar(name, allow_duplicates_with_different_content, compressor, compressor_args, deps,
-             empty_dirs, empty_files, extension, files, include_runfiles, mode, modes, mtime, out,
-             owner, ownername, ownernames, owners, package_dir, package_dir_file, package_file_name,
-             package_variables, portable_mtime, private_stamp_detect, remap_paths, srcs, stamp,
-             strip_prefix, symlinks, symlinks)
+pkg_tar(name, allow_duplicates_with_different_content, allow_duplicates_from_deps, compressor, compressor_args,
+             create_parents, deps, empty_dirs, empty_files, extension, files, include_runfiles, mode,  
+             modes, mtime, out, owner, ownername, ownernames, owners, package_dir,
+             package_dir_file, package_file_name, package_variables, portable_mtime, private_stamp_detect, remap_paths, srcs, 
+             stamp, strip_prefix, symlinks)
 
@@ -298,9 +298,11 @@ pkg_tar(name, name | A unique name for this target. | Name | required | | +| allow_duplicates_from_deps | Supports existence of duplicate paths for archives in deps, and addition of duplicates file paths as archives (deps) or files (srcs). | Boolean | optional | False | | allow_duplicates_with_different_content | If true, will allow you to reference multiple pkg_* which conflict (writing different content or metadata to the same destination). Such behaviour is always incorrect, but we provide a flag to support it in case old builds were accidentally doing it. Never explicitly set this to true for new code. | Boolean | optional | True | | compressor | External tool which can compress the archive. | Label | optional | None | | compressor_args | Arg list for compressor. | String | optional | "" | +| create_parents | Implicitly create parent directories with default permissions for file paths where parent directories are not specified. | Boolean | optional | True | | deps | tar files which will be unpacked and repacked into the archive. | List of labels | optional | [] | | empty_dirs | - | List of strings | optional | [] | | empty_files | - | List of strings | optional | [] | @@ -326,7 +328,6 @@ pkg_tar(name, stamp | Enable file time stamping. Possible values:
  • stamp = 1: Use the time of the build as the modification time of each file in the archive.
  • stamp = 0: Use an "epoch" time for the modification time of each file. This gives good build result caching.
  • stamp = -1: Control the chosen modification time using the --[no]stamp flag.
    Since 0.5.0
    | Integer | optional | 0 | | strip_prefix | (note: Use strip_prefix = "." to strip path to the package but preserve relative paths of sub directories beneath the package.) | String | optional | "" | | symlinks | - | Dictionary: String -> String | optional | {} | -| create_parents | Implicitly create parent directories with default permissions for file paths where parent directories are not specified. | Boolean | optional | True | diff --git a/pkg/private/tar/build_tar.py b/pkg/private/tar/build_tar.py index 559c02e3..ce80a9fe 100644 --- a/pkg/private/tar/build_tar.py +++ b/pkg/private/tar/build_tar.py @@ -42,7 +42,7 @@ class TarFile(object): class DebError(Exception): pass - def __init__(self, output, directory, compression, compressor, create_parents, default_mtime): + def __init__(self, output, directory, compression, compressor, create_parents, allow_dups_from_deps, default_mtime): # Directory prefix on all output paths d = directory.strip('/') self.directory = (d + '/') if d else None @@ -51,6 +51,7 @@ def __init__(self, output, directory, compression, compressor, create_parents, d self.compressor = compressor self.default_mtime = default_mtime self.create_parents = create_parents + self.allow_dups_from_deps = allow_dups_from_deps def __enter__(self): self.tarfile = tar_writer.TarFileWriter( @@ -58,6 +59,7 @@ def __enter__(self): self.compression, self.compressor, self.create_parents, + self.allow_dups_from_deps, default_mtime=self.default_mtime) return self @@ -392,6 +394,9 @@ def main(): action='store_true', help='Automatically creates parent directories implied by a' ' prefix if they do not exist') + parser.add_argument('--allow_dups_from_deps', + action='store_true', + help='') options = parser.parse_args() # Parse modes arguments @@ -442,7 +447,8 @@ def main(): compression = options.compression, compressor = options.compressor, default_mtime=default_mtime, - create_parents=options.create_parents) as output: + create_parents=options.create_parents, + allow_dups_from_deps=options.allow_dups_from_deps) as output: def file_attributes(filename): if filename.startswith('/'): diff --git a/pkg/private/tar/tar.bzl b/pkg/private/tar/tar.bzl index 8bea9fbc..120820a8 100644 --- a/pkg/private/tar/tar.bzl +++ b/pkg/private/tar/tar.bzl @@ -176,6 +176,9 @@ def _pkg_tar_impl(ctx): if ctx.attr.create_parents: args.add("--create_parents") + if ctx.attr.allow_duplicates_from_deps: + args.add("--allow_dups_from_deps") + inputs = depset( direct = ctx.files.deps + files, transitive = mapping_context.file_deps, @@ -268,6 +271,7 @@ pkg_tar_impl = rule( doc = """Arg list for `compressor`.""", ), "create_parents": attr.bool(default = True), + "allow_duplicates_from_deps": attr.bool(default = False), # Common attributes "out": attr.output(mandatory = True), diff --git a/pkg/private/tar/tar_writer.py b/pkg/private/tar/tar_writer.py index 957ce45a..cee232da 100644 --- a/pkg/private/tar/tar_writer.py +++ b/pkg/private/tar/tar_writer.py @@ -47,6 +47,7 @@ def __init__(self, compression='', compressor='', create_parents=False, + allow_dups_from_deps=True, default_mtime=None, preserve_tar_mtimes=True): """TarFileWriter wraps tarfile.open(). @@ -108,6 +109,7 @@ def __init__(self, self.directories.add('/') self.directories.add('./') self.create_parents = create_parents + self.allow_dups_from_deps = allow_dups_from_deps def __enter__(self): return self @@ -125,14 +127,15 @@ def _addfile(self, info, fileobj=None): # Enforce the ending / for directories so we correctly deduplicate. if not info.name.endswith('/'): info.name += '/' - if not self._have_added(info.name): - self.tar.addfile(info, fileobj) - self.members.add(info.name) - if info.type == tarfile.DIRTYPE: - self.directories.add(info.name) - elif info.type != tarfile.DIRTYPE: - print('Duplicate file in archive: %s, ' - 'picking first occurrence' % info.name) + if not self.allow_dups_from_deps and self._have_added(info.name): + print('Duplicate file in archive: %s, ' + 'picking first occurrence' % info.name) + return + + self.tar.addfile(info, fileobj) + self.members.add(info.name) + if info.type == tarfile.DIRTYPE: + self.directories.add(info.name) def add_directory_path(self, path, @@ -154,7 +157,7 @@ def add_directory_path(self, mode: unix permission mode of the file, default: 0755. """ assert path[-1] == '/' - if not path or self._have_added(path): + if not path: return if _DEBUG_VERBOSITY > 1: print('DEBUG: adding directory', path) @@ -168,12 +171,14 @@ def add_directory_path(self, tarinfo.gname = gname self._addfile(tarinfo) - def add_parents(self, path, uid=0, gid=0, uname='', gname='', mtime=0, mode=0o755): + def conditionally_add_parents(self, path, uid=0, gid=0, uname='', gname='', mtime=0, mode=0o755): dirs = path.split('/') parent_path = '' for next_level in dirs[0:-1]: parent_path = parent_path + next_level + '/' - self.add_directory_path( + + if self.create_parents and not self._have_added(parent_path): + self.add_directory_path( parent_path, uid=uid, gid=gid, @@ -214,15 +219,14 @@ def add_file(self, return if name == '.': return - if name in self.members: + if not self.allow_dups_from_deps and name in self.members: return if mtime is None: mtime = self.default_mtime # Make directories up the file - if self.create_parents: - self.add_parents(name, mtime=mtime, mode=0o755, uid=uid, gid=gid, uname=uname, gname=gname) + self.conditionally_add_parents(name, mtime=mtime, mode=0o755, uid=uid, gid=gid, uname=uname, gname=gname) tarinfo = tarfile.TarInfo(name) tarinfo.mtime = mtime @@ -294,15 +298,14 @@ def add_tar(self, if prefix: in_name = os.path.normpath(prefix + in_name).replace(os.path.sep, '/') tarinfo.name = in_name - if self.create_parents: - self.add_parents( - path=tarinfo.name, - mtime=tarinfo.mtime, - mode=0o755, - uid=tarinfo.uid, - gid=tarinfo.gid, - uname=tarinfo.uname, - gname=tarinfo.gname) + self.conditionally_add_parents( + path=tarinfo.name, + mtime=tarinfo.mtime, + mode=0o755, + uid=tarinfo.uid, + gid=tarinfo.gid, + uname=tarinfo.uname, + gname=tarinfo.gname) if prefix is not None: # Relocate internal hardlinks as well to avoid breaking them. diff --git a/tests/tar/BUILD b/tests/tar/BUILD index 581cab42..9480a46c 100644 --- a/tests/tar/BUILD +++ b/tests/tar/BUILD @@ -91,6 +91,13 @@ pkg_tar( deps = ["//tests:testdata/tar_test.tar"], ) +pkg_tar( + name = "test-respect-externally-defined-duplicates", + deps = ["//tests:testdata/duplicate_entries.tar"], + create_parents = False, + allow_duplicates_from_deps = True, +) + # # Tests for package_file_name # @@ -456,6 +463,7 @@ py_test( ":test-pkg-tar-from-pkg-files-with-attributes", ":test-pkg-tar-with-attributes", ":test-remap-paths-tree-artifact", + ":test-respect-externally-defined-duplicates.tar", ":test-tar-empty_dirs.tar", ":test-tar-empty_files.tar", ":test-tar-files_dict.tar", diff --git a/tests/tar/pkg_tar_test.py b/tests/tar/pkg_tar_test.py index d18c49c0..012c7de2 100644 --- a/tests/tar/pkg_tar_test.py +++ b/tests/tar/pkg_tar_test.py @@ -284,6 +284,15 @@ def test_remap_paths_tree_artifact(self): ] self.assertTarFileContent('test-remap-paths-tree-artifact.tar', content) + def test_externally_defined_duplicate_structure(self): + content = [ + {'name': './a'}, + {'name': './b'}, + {'name': './ab'}, + {'name': './ab'}, + ] + self.assertTarFileContent('test-respect-externally-defined-duplicates.tar', content) + if __name__ == '__main__': unittest.main() diff --git a/tests/tar/tar_writer_test.py b/tests/tar/tar_writer_test.py index 8503d317..e7d9c599 100644 --- a/tests/tar/tar_writer_test.py +++ b/tests/tar/tar_writer_test.py @@ -138,7 +138,7 @@ def testMergeTarRelocated(self): {"name": "foo/a", "data": b"a"}, {"name": "foo/ab", "data": b"ab"}, ] - with tar_writer.TarFileWriter(self.tempfile, create_parents=True) as f: + with tar_writer.TarFileWriter(self.tempfile, create_parents=True, allow_dups_from_deps=False) as f: datafile = self.data_files.Rlocation( "rules_pkg/tests/testdata/tar_test.tar") f.add_tar(datafile, name_filter=lambda n: n != "./b", prefix="foo") @@ -185,7 +185,7 @@ def testAddingDirectoriesForFile(self): self.assertTarFileContent(self.tempfile, content) def testAddingDirectoriesForFileManually(self): - with tar_writer.TarFileWriter(self.tempfile, create_parents=True) as f: + with tar_writer.TarFileWriter(self.tempfile, create_parents=True, allow_dups_from_deps=False) as f: f.add_file("d", tarfile.DIRTYPE) f.add_file("d/f") @@ -211,7 +211,7 @@ def testAddingDirectoriesForFileManually(self): self.assertTarFileContent(self.tempfile, content) def testAddingOnlySpecifiedFiles(self): - with tar_writer.TarFileWriter(self.tempfile) as f: + with tar_writer.TarFileWriter(self.tempfile, allow_dups_from_deps=False) as f: f.add_file("a", tarfile.DIRTYPE) f.add_file("a/b", tarfile.DIRTYPE) f.add_file("a/b/", tarfile.DIRTYPE) @@ -261,6 +261,46 @@ def testCustomCompression(self): self.assertTarFileContent(original, expected_content) self.assertTarFileContent(self.tempfile, expected_content) + def testAdditionOfDuplicatePath(self): + expected_content = [ + {"name": "./" + x} for x in ["a", "b", "ab"]] + [ + {"name": "./b", "data": "q".encode("utf-8")} + ] + with tar_writer.TarFileWriter(self.tempfile) as f: + datafile = self.data_files.Rlocation( + "rules_pkg/tests/testdata/tar_test.tar") + f.add_tar(datafile) + f.add_file('./b', content="q") + + self.assertTarFileContent(self.tempfile, expected_content) + + def testAdditionOfArchives(self): + + expected_content = [ + {"name": "./" + x} for x in ["a", "b", "ab", "a", "b", "ab"] + ] + with tar_writer.TarFileWriter(self.tempfile) as f: + datafile = self.data_files.Rlocation( + "rules_pkg/tests/testdata/tar_test.tar") + + f.add_tar(datafile) + f.add_tar(datafile) + + self.assertTarFileContent(self.tempfile, expected_content) + + def testOnlyIntermediateParentsInferred(self): + expected_content = [ + {"name": "./a", "mode": 0o111}, + {"name": "./a/b", "mode": 0o755}, + {"name": "./a/b/c"}, + ] + with tar_writer.TarFileWriter(self.tempfile, create_parents=True) as f: + f.add_file('./a', tarfile.DIRTYPE, mode=0o111) + f.add_file('./a/b/c') + + self.assertTarFileContent(self.tempfile, expected_content) + + if __name__ == "__main__": unittest.main() diff --git a/tests/testdata/duplicate_entries.tar b/tests/testdata/duplicate_entries.tar new file mode 100644 index 0000000000000000000000000000000000000000..d1823412c7d66cb0e50b4ee9ee47cf40e0ba01c8 GIT binary patch literal 10240 zcmeI!O%8(~5Qbrn;t7NS9M6NjL4WRFX=7@-X*C(b;$?#|DDVtlbo9kpx3D(V>}tQZ z?|!`{PU567%^G4Ir!i+Y1nY&>_djm;avg>iQ}sB9ef4|XHEY@gM!#&AW_=C)|M{O^ z`JV#$H!H~Q#rD