Skip to content

Commit

Permalink
Support duplicate paths in tar archives (bazelbuild#850)
Browse files Browse the repository at this point in the history
Duplicate path entries are made possible within tar archives as
discussed in feature request bazelbuild#849. This includes an interaction with
create parents, where the only logical scenario which would require
inference of a parent directory is when one does not already exist.
This is because allowance of duplicates is only useful when explicit
paths are declared.

RELNOTES: Duplicate path entries supported within tar archives
  • Loading branch information
eejayes authored May 29, 2024
1 parent bf4609b commit 2247f5d
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 34 deletions.
13 changes: 7 additions & 6 deletions docs/latest.md
Original file line number Diff line number Diff line change
Expand Up @@ -283,11 +283,11 @@ Rules for making .tar files.
## pkg_tar

<pre>
pkg_tar(<a href="#pkg_tar-name">name</a>, <a href="#pkg_tar-allow_duplicates_with_different_content">allow_duplicates_with_different_content</a>, <a href="#pkg_tar-compressor">compressor</a>, <a href="#pkg_tar-compressor_args">compressor_args</a>, <a href="#pkg_tar-deps">deps</a>,
<a href="#pkg_tar-empty_dirs">empty_dirs</a>, <a href="#pkg_tar-empty_files">empty_files</a>, <a href="#pkg_tar-extension">extension</a>, <a href="#pkg_tar-files">files</a>, <a href="#pkg_tar-include_runfiles">include_runfiles</a>, <a href="#pkg_tar-mode">mode</a>, <a href="#pkg_tar-modes">modes</a>, <a href="#pkg_tar-mtime">mtime</a>, <a href="#pkg_tar-out">out</a>,
<a href="#pkg_tar-owner">owner</a>, <a href="#pkg_tar-ownername">ownername</a>, <a href="#pkg_tar-ownernames">ownernames</a>, <a href="#pkg_tar-owners">owners</a>, <a href="#pkg_tar-package_dir">package_dir</a>, <a href="#pkg_tar-package_dir_file">package_dir_file</a>, <a href="#pkg_tar-package_file_name">package_file_name</a>,
<a href="#pkg_tar-package_variables">package_variables</a>, <a href="#pkg_tar-portable_mtime">portable_mtime</a>, <a href="#pkg_tar-private_stamp_detect">private_stamp_detect</a>, <a href="#pkg_tar-remap_paths">remap_paths</a>, <a href="#pkg_tar-srcs">srcs</a>, <a href="#pkg_tar-stamp">stamp</a>,
<a href="#pkg_tar-strip_prefix">strip_prefix</a>, <a href="#pkg_tar-symlinks">symlinks</a>, <a href="#pkg_tar-create_parents">symlinks</a>)
pkg_tar(<a href="#pkg_tar-name">name</a>, <a href="#pkg_tar-allow_duplicates_with_different_content">allow_duplicates_with_different_content</a>, <a href="#pkg_tar-allow_duplicates_from_deps">allow_duplicates_from_deps</a>, <a href="#pkg_tar-compressor">compressor</a>, <a href="#pkg_tar-compressor_args">compressor_args</a>,
<a href="#pkg_tar-create_parents">create_parents</a>, <a href="#pkg_tar-deps">deps</a>, <a href="#pkg_tar-empty_dirs">empty_dirs</a>, <a href="#pkg_tar-empty_files">empty_files</a>, <a href="#pkg_tar-extension">extension</a>, <a href="#pkg_tar-files">files</a>, <a href="#pkg_tar-include_runfiles">include_runfiles</a>, <a href="#pkg_tar-mode">mode</a>,
<a href="#pkg_tar-modes">modes</a>, <a href="#pkg_tar-mtime">mtime</a>, <a href="#pkg_tar-out">out</a>, <a href="#pkg_tar-owner">owner</a>, <a href="#pkg_tar-ownername">ownername</a>, <a href="#pkg_tar-ownernames">ownernames</a>, <a href="#pkg_tar-owners">owners</a>, <a href="#pkg_tar-package_dir">package_dir</a>,
<a href="#pkg_tar-package_dir_file">package_dir_file</a>, <a href="#pkg_tar-package_file_name">package_file_name</a>, <a href="#pkg_tar-package_variables">package_variables</a>, <a href="#pkg_tar-portable_mtime">portable_mtime</a>, <a href="#pkg_tar-private_stamp_detect">private_stamp_detect</a>, <a href="#pkg_tar-remap_paths">remap_paths</a>, <a href="#pkg_tar-srcs">srcs</a>,
<a href="#pkg_tar-stamp">stamp</a>, <a href="#pkg_tar-strip_prefix">strip_prefix</a>, <a href="#pkg_tar-symlinks">symlinks</a>)
</pre>


Expand All @@ -298,9 +298,11 @@ pkg_tar(<a href="#pkg_tar-name">name</a>, <a href="#pkg_tar-allow_duplicates_wit
| Name | Description | Type | Mandatory | Default |
| :------------- | :------------- | :------------- | :------------- | :------------- |
| <a id="pkg_tar-name"></a>name | A unique name for this target. | <a href="https://bazel.build/docs/build-ref.html#name">Name</a> | required | |
| <a id="pkg_tar-allow_duplicates_from_deps"></a>allow_duplicates_from_deps | Supports existence of duplicate paths for archives in deps, and addition of duplicates file paths as archives (deps) or files (srcs). | Boolean | optional | False |
| <a id="pkg_tar-allow_duplicates_with_different_content"></a>allow_duplicates_with_different_content | If true, will allow you to reference multiple pkg_* which conflict (writing different content or metadata to the same destination). Such behaviour is always incorrect, but we provide a flag to support it in case old builds were accidentally doing it. Never explicitly set this to true for new code. | Boolean | optional | True |
| <a id="pkg_tar-compressor"></a>compressor | External tool which can compress the archive. | <a href="https://bazel.build/docs/build-ref.html#labels">Label</a> | optional | None |
| <a id="pkg_tar-compressor_args"></a>compressor_args | Arg list for <code>compressor</code>. | String | optional | "" |
| <a id="pkg_tar-create_parents"></a>create_parents | Implicitly create parent directories with default permissions for file paths where parent directories are not specified. | Boolean | optional | True |
| <a id="pkg_tar-deps"></a>deps | tar files which will be unpacked and repacked into the archive. | <a href="https://bazel.build/docs/build-ref.html#labels">List of labels</a> | optional | [] |
| <a id="pkg_tar-empty_dirs"></a>empty_dirs | - | List of strings | optional | [] |
| <a id="pkg_tar-empty_files"></a>empty_files | - | List of strings | optional | [] |
Expand All @@ -326,7 +328,6 @@ pkg_tar(<a href="#pkg_tar-name">name</a>, <a href="#pkg_tar-allow_duplicates_wit
| <a id="pkg_tar-stamp"></a>stamp | Enable file time stamping. Possible values: <li>stamp = 1: Use the time of the build as the modification time of each file in the archive. <li>stamp = 0: Use an "epoch" time for the modification time of each file. This gives good build result caching. <li>stamp = -1: Control the chosen modification time using the --[no]stamp flag. <div class="since"><i>Since 0.5.0</i></div> | Integer | optional | 0 |
| <a id="pkg_tar-strip_prefix"></a>strip_prefix | (note: Use strip_prefix = "." to strip path to the package but preserve relative paths of sub directories beneath the package.) | String | optional | "" |
| <a id="pkg_tar-symlinks"></a>symlinks | - | <a href="https://bazel.build/docs/skylark/lib/dict.html">Dictionary: String -> String</a> | optional | {} |
| <a id="pkg_tar-create_parents"></a>create_parents | Implicitly create parent directories with default permissions for file paths where parent directories are not specified. | Boolean | optional | True |



Expand Down
10 changes: 8 additions & 2 deletions pkg/private/tar/build_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class TarFile(object):
class DebError(Exception):
pass

def __init__(self, output, directory, compression, compressor, create_parents, default_mtime):
def __init__(self, output, directory, compression, compressor, create_parents, allow_dups_from_deps, default_mtime):
# Directory prefix on all output paths
d = directory.strip('/')
self.directory = (d + '/') if d else None
Expand All @@ -51,13 +51,15 @@ def __init__(self, output, directory, compression, compressor, create_parents, d
self.compressor = compressor
self.default_mtime = default_mtime
self.create_parents = create_parents
self.allow_dups_from_deps = allow_dups_from_deps

def __enter__(self):
self.tarfile = tar_writer.TarFileWriter(
self.output,
self.compression,
self.compressor,
self.create_parents,
self.allow_dups_from_deps,
default_mtime=self.default_mtime)
return self

Expand Down Expand Up @@ -392,6 +394,9 @@ def main():
action='store_true',
help='Automatically creates parent directories implied by a'
' prefix if they do not exist')
parser.add_argument('--allow_dups_from_deps',
action='store_true',
help='')
options = parser.parse_args()

# Parse modes arguments
Expand Down Expand Up @@ -442,7 +447,8 @@ def main():
compression = options.compression,
compressor = options.compressor,
default_mtime=default_mtime,
create_parents=options.create_parents) as output:
create_parents=options.create_parents,
allow_dups_from_deps=options.allow_dups_from_deps) as output:

def file_attributes(filename):
if filename.startswith('/'):
Expand Down
4 changes: 4 additions & 0 deletions pkg/private/tar/tar.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@ def _pkg_tar_impl(ctx):
if ctx.attr.create_parents:
args.add("--create_parents")

if ctx.attr.allow_duplicates_from_deps:
args.add("--allow_dups_from_deps")

inputs = depset(
direct = ctx.files.deps + files,
transitive = mapping_context.file_deps,
Expand Down Expand Up @@ -268,6 +271,7 @@ pkg_tar_impl = rule(
doc = """Arg list for `compressor`.""",
),
"create_parents": attr.bool(default = True),
"allow_duplicates_from_deps": attr.bool(default = False),

# Common attributes
"out": attr.output(mandatory = True),
Expand Down
49 changes: 26 additions & 23 deletions pkg/private/tar/tar_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def __init__(self,
compression='',
compressor='',
create_parents=False,
allow_dups_from_deps=True,
default_mtime=None,
preserve_tar_mtimes=True):
"""TarFileWriter wraps tarfile.open().
Expand Down Expand Up @@ -108,6 +109,7 @@ def __init__(self,
self.directories.add('/')
self.directories.add('./')
self.create_parents = create_parents
self.allow_dups_from_deps = allow_dups_from_deps

def __enter__(self):
return self
Expand All @@ -125,14 +127,15 @@ def _addfile(self, info, fileobj=None):
# Enforce the ending / for directories so we correctly deduplicate.
if not info.name.endswith('/'):
info.name += '/'
if not self._have_added(info.name):
self.tar.addfile(info, fileobj)
self.members.add(info.name)
if info.type == tarfile.DIRTYPE:
self.directories.add(info.name)
elif info.type != tarfile.DIRTYPE:
print('Duplicate file in archive: %s, '
'picking first occurrence' % info.name)
if not self.allow_dups_from_deps and self._have_added(info.name):
print('Duplicate file in archive: %s, '
'picking first occurrence' % info.name)
return

self.tar.addfile(info, fileobj)
self.members.add(info.name)
if info.type == tarfile.DIRTYPE:
self.directories.add(info.name)

def add_directory_path(self,
path,
Expand All @@ -154,7 +157,7 @@ def add_directory_path(self,
mode: unix permission mode of the file, default: 0755.
"""
assert path[-1] == '/'
if not path or self._have_added(path):
if not path:
return
if _DEBUG_VERBOSITY > 1:
print('DEBUG: adding directory', path)
Expand All @@ -168,12 +171,14 @@ def add_directory_path(self,
tarinfo.gname = gname
self._addfile(tarinfo)

def add_parents(self, path, uid=0, gid=0, uname='', gname='', mtime=0, mode=0o755):
def conditionally_add_parents(self, path, uid=0, gid=0, uname='', gname='', mtime=0, mode=0o755):
dirs = path.split('/')
parent_path = ''
for next_level in dirs[0:-1]:
parent_path = parent_path + next_level + '/'
self.add_directory_path(

if self.create_parents and not self._have_added(parent_path):
self.add_directory_path(
parent_path,
uid=uid,
gid=gid,
Expand Down Expand Up @@ -214,15 +219,14 @@ def add_file(self,
return
if name == '.':
return
if name in self.members:
if not self.allow_dups_from_deps and name in self.members:
return

if mtime is None:
mtime = self.default_mtime

# Make directories up the file
if self.create_parents:
self.add_parents(name, mtime=mtime, mode=0o755, uid=uid, gid=gid, uname=uname, gname=gname)
self.conditionally_add_parents(name, mtime=mtime, mode=0o755, uid=uid, gid=gid, uname=uname, gname=gname)

tarinfo = tarfile.TarInfo(name)
tarinfo.mtime = mtime
Expand Down Expand Up @@ -294,15 +298,14 @@ def add_tar(self,
if prefix:
in_name = os.path.normpath(prefix + in_name).replace(os.path.sep, '/')
tarinfo.name = in_name
if self.create_parents:
self.add_parents(
path=tarinfo.name,
mtime=tarinfo.mtime,
mode=0o755,
uid=tarinfo.uid,
gid=tarinfo.gid,
uname=tarinfo.uname,
gname=tarinfo.gname)
self.conditionally_add_parents(
path=tarinfo.name,
mtime=tarinfo.mtime,
mode=0o755,
uid=tarinfo.uid,
gid=tarinfo.gid,
uname=tarinfo.uname,
gname=tarinfo.gname)

if prefix is not None:
# Relocate internal hardlinks as well to avoid breaking them.
Expand Down
8 changes: 8 additions & 0 deletions tests/tar/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@ pkg_tar(
deps = ["//tests:testdata/tar_test.tar"],
)

pkg_tar(
name = "test-respect-externally-defined-duplicates",
deps = ["//tests:testdata/duplicate_entries.tar"],
create_parents = False,
allow_duplicates_from_deps = True,
)

#
# Tests for package_file_name
#
Expand Down Expand Up @@ -456,6 +463,7 @@ py_test(
":test-pkg-tar-from-pkg-files-with-attributes",
":test-pkg-tar-with-attributes",
":test-remap-paths-tree-artifact",
":test-respect-externally-defined-duplicates.tar",
":test-tar-empty_dirs.tar",
":test-tar-empty_files.tar",
":test-tar-files_dict.tar",
Expand Down
9 changes: 9 additions & 0 deletions tests/tar/pkg_tar_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,15 @@ def test_remap_paths_tree_artifact(self):
]
self.assertTarFileContent('test-remap-paths-tree-artifact.tar', content)

def test_externally_defined_duplicate_structure(self):
content = [
{'name': './a'},
{'name': './b'},
{'name': './ab'},
{'name': './ab'},
]
self.assertTarFileContent('test-respect-externally-defined-duplicates.tar', content)


if __name__ == '__main__':
unittest.main()
46 changes: 43 additions & 3 deletions tests/tar/tar_writer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def testMergeTarRelocated(self):
{"name": "foo/a", "data": b"a"},
{"name": "foo/ab", "data": b"ab"},
]
with tar_writer.TarFileWriter(self.tempfile, create_parents=True) as f:
with tar_writer.TarFileWriter(self.tempfile, create_parents=True, allow_dups_from_deps=False) as f:
datafile = self.data_files.Rlocation(
"rules_pkg/tests/testdata/tar_test.tar")
f.add_tar(datafile, name_filter=lambda n: n != "./b", prefix="foo")
Expand Down Expand Up @@ -185,7 +185,7 @@ def testAddingDirectoriesForFile(self):
self.assertTarFileContent(self.tempfile, content)

def testAddingDirectoriesForFileManually(self):
with tar_writer.TarFileWriter(self.tempfile, create_parents=True) as f:
with tar_writer.TarFileWriter(self.tempfile, create_parents=True, allow_dups_from_deps=False) as f:
f.add_file("d", tarfile.DIRTYPE)
f.add_file("d/f")

Expand All @@ -211,7 +211,7 @@ def testAddingDirectoriesForFileManually(self):
self.assertTarFileContent(self.tempfile, content)

def testAddingOnlySpecifiedFiles(self):
with tar_writer.TarFileWriter(self.tempfile) as f:
with tar_writer.TarFileWriter(self.tempfile, allow_dups_from_deps=False) as f:
f.add_file("a", tarfile.DIRTYPE)
f.add_file("a/b", tarfile.DIRTYPE)
f.add_file("a/b/", tarfile.DIRTYPE)
Expand Down Expand Up @@ -261,6 +261,46 @@ def testCustomCompression(self):
self.assertTarFileContent(original, expected_content)
self.assertTarFileContent(self.tempfile, expected_content)

def testAdditionOfDuplicatePath(self):
expected_content = [
{"name": "./" + x} for x in ["a", "b", "ab"]] + [
{"name": "./b", "data": "q".encode("utf-8")}
]
with tar_writer.TarFileWriter(self.tempfile) as f:
datafile = self.data_files.Rlocation(
"rules_pkg/tests/testdata/tar_test.tar")
f.add_tar(datafile)
f.add_file('./b', content="q")

self.assertTarFileContent(self.tempfile, expected_content)

def testAdditionOfArchives(self):

expected_content = [
{"name": "./" + x} for x in ["a", "b", "ab", "a", "b", "ab"]
]
with tar_writer.TarFileWriter(self.tempfile) as f:
datafile = self.data_files.Rlocation(
"rules_pkg/tests/testdata/tar_test.tar")

f.add_tar(datafile)
f.add_tar(datafile)

self.assertTarFileContent(self.tempfile, expected_content)

def testOnlyIntermediateParentsInferred(self):
expected_content = [
{"name": "./a", "mode": 0o111},
{"name": "./a/b", "mode": 0o755},
{"name": "./a/b/c"},
]
with tar_writer.TarFileWriter(self.tempfile, create_parents=True) as f:
f.add_file('./a', tarfile.DIRTYPE, mode=0o111)
f.add_file('./a/b/c')

self.assertTarFileContent(self.tempfile, expected_content)



if __name__ == "__main__":
unittest.main()
Binary file added tests/testdata/duplicate_entries.tar
Binary file not shown.

0 comments on commit 2247f5d

Please sign in to comment.