Skip to content

Commit

Permalink
Stop archive writer from always adding './' as a root path. (#540)
Browse files Browse the repository at this point in the history
* Stop archive writer from always adding './' as a root path.

- clean up tests appropriately
- remove achive.add_dir because we do not use it

This is a precursor change to removing the unwanted leading ./ from tar writers.
See #50, #531
  • Loading branch information
aiuto authored Feb 14, 2022
1 parent cce90a0 commit d915e26
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 211 deletions.
2 changes: 1 addition & 1 deletion distro/packaging_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def CopyTestFile(source_name, dest_name):
# TODO(aiuto): Find tar in a disciplined way
content = subprocess.check_output(
['tar', 'tzf', 'bazel-bin/dummy_tar.tar.gz'])
self.assertEqual(b'./\n./BUILD\n', content)
self.assertEqual(b'./BUILD\n', content)


if __name__ == '__main__':
Expand Down
136 changes: 41 additions & 95 deletions pkg/private/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def __init__(self,
name,
compression='',
compressor='',
root_directory='.',
root_directory='',
default_mtime=None,
preserve_tar_mtimes=True):
"""TarFileWriter wraps tarfile.open().
Expand Down Expand Up @@ -174,89 +174,39 @@ def __init__(self,
stdout=open(name, 'wb'))
self.fileobj = self.compressor_proc.stdin
self.name = name
self.root_directory = root_directory.rstrip('/').rstrip('\\')
self.root_directory = self.root_directory.replace('\\', '/')
# tarfile uses / instead of os.path.sep
self.root_directory = root_directory.replace(os.path.sep, '/').rstrip('/')
if self.root_directory:
self.root_directory = self.root_directory + '/'

self.tar = tarfile.open(name=name, mode=mode, fileobj=self.fileobj)
self.members = set([])
self.directories = set([])
self.members = set()
# The directories we have created so far
self.directories = set()

def __enter__(self):
return self

def __exit__(self, t, v, traceback):
self.close()

def add_dir(self,
name,
path,
uid=0,
gid=0,
uname='',
gname='',
mtime=None,
mode=None,
depth=100):
"""Recursively add a directory.

Args:
name: the ('/' delimited) path of the directory to add.
path: the (os.path.sep delimited) path of the directory to add.
uid: owner user identifier.
gid: owner group identifier.
uname: owner user names.
gname: owner group names.
mtime: modification time to put in the archive.
mode: unix permission mode of the file, default 0644 (0755).
depth: maximum depth to recurse in to avoid infinite loops
with cyclic mounts.
def add_root(self, path: str) -> str:
"""Add the root prefix to a path.
Raises:
TarFileWriter.Error: when the recursion depth has exceeded the
`depth` argument.
If the path begins with / or the prefix itself, do nothing.
Args:
path: a file path
Returns:
modified path.
"""
if not (name == self.root_directory or name.startswith('/') or
name.startswith(self.root_directory + '/')):
name = self.root_directory + '/' + name
if mtime is None:
mtime = self.default_mtime
path = path.rstrip('/').rstrip('\\')
if os.path.isdir(path):
# Remove trailing '/' (index -1 => last character)
if name[-1] in ('/', '\\'):
name = name[:-1]
# Add the x bit to directories to prevent non-traversable directories.
# The x bit is set to 1 only if the read bit is also set.
dirmode = (mode | ((0o444 & mode) >> 2)) if mode else mode
self.add_file(name + '/',
tarfile.DIRTYPE,
uid=uid,
gid=gid,
uname=uname,
gname=gname,
mtime=mtime,
mode=dirmode)
if depth <= 0:
raise self.Error('Recursion depth exceeded, probably in '
'an infinite directory loop.')
# Iterate over the sorted list of file so we get a deterministic result.
filelist = os.listdir(path)
filelist.sort()
for f in filelist:
new_name = name + '/' + f
new_path = os.path.join(path, f)
self.add_dir(new_name, new_path, uid, gid, uname, gname, mtime, mode,
depth - 1)
else:
self.add_file(name,
tarfile.REGTYPE,
file_content=path,
uid=uid,
gid=gid,
uname=uname,
gname=gname,
mtime=mtime,
mode=mode)
path = path.replace(os.path.sep, '/').rstrip('/')
if not self.root_directory or path.startswith('/'):
return path
if (path + '/').startswith(self.root_directory):
return path
return self.root_directory + path

def _addfile(self, info, fileobj=None):
"""Add a file in the tar file if there is no conflict."""
Expand Down Expand Up @@ -287,7 +237,7 @@ def add_file(self,
Args:
name: the ('/' delimited) path of the file to add.
kind: the type of the file to add, see tarfile.*TYPE.
content: a textual content to put in the file.
content: the content to put in the file.
link: if the file is a link, the destination of the link.
file_content: file to read the content from. Provide either this
one or `content` to specifies a content for the file.
Expand All @@ -298,32 +248,30 @@ def add_file(self,
mtime: modification time to put in the archive.
mode: unix permission mode of the file, default 0644 (0755).
"""
name = name.replace('\\', '/')
if file_content and os.path.isdir(file_content):
# Recurse into directory
self.add_dir(name, file_content, uid, gid, uname, gname, mtime, mode)
if not name:
return
if not (name == self.root_directory or name.startswith('/') or
name.startswith(self.root_directory + '/')):
name = self.root_directory + '/' + name
if kind == tarfile.DIRTYPE:
name = name.rstrip('/').rstrip('\\')
if name in self.directories:
return
if name == '.':
return
name = self.add_root(name)
# Do not add a directory that is already in the tar file.
if kind == tarfile.DIRTYPE and name in self.directories:
return

if mtime is None:
mtime = self.default_mtime

components = name.rsplit('/', 1)
if len(components) > 1:
d = components[0]
self.add_file(d,
tarfile.DIRTYPE,
# Make directories up the file
parent_dirs = name.rsplit('/', 1)
if len(parent_dirs) > 1:
self.add_file(parent_dirs[0],
kind=tarfile.DIRTYPE,
uid=uid,
gid=gid,
uname=uname,
gname=gname,
mtime=mtime,
mode=0o755)

tarinfo = tarfile.TarInfo(name)
tarinfo.mtime = mtime
tarinfo.uid = uid
Expand All @@ -346,9 +294,10 @@ def add_file(self,
tarinfo.size = os.fstat(f.fileno()).st_size
self._addfile(tarinfo, f)
else:
if kind == tarfile.DIRTYPE:
self.directories.add(name)
self._addfile(tarinfo)
if kind == tarfile.DIRTYPE:
assert name[-1] != '/'
self.directories.add(name)

def add_tar(self,
tar,
Expand Down Expand Up @@ -392,10 +341,7 @@ def add_tar(self,
tarinfo.uname = ''
tarinfo.gname = ''

name = tarinfo.name
if (not name.startswith('/') and
not name.startswith(self.root_directory)):
name = self.root_directory + '/' + name
name = self.add_root(tarinfo.name)
if root is not None:
if name.startswith('.'):
name = '.' + root + name.lstrip('.')
Expand Down
1 change: 0 additions & 1 deletion tests/deb/pkg_deb_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ def assert_tar_stream_content(self, data, expected, match_order=True):
def test_expected_files(self):
# Check the set of 'test-tar-basic-*' smoke test.
expected = [
{'name': '.', 'isdir': True},
{'name': './etc', 'isdir': True,
'uid': 24, 'gid': 42, 'uname': 'foobar', 'gname': 'fizzbuzz'},
{'name': './etc/nsswitch.conf',
Expand Down
123 changes: 25 additions & 98 deletions tests/tar/archive_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,7 @@ def assertSimpleFileContent(self, names):
for n in names:
f.add_file(n, content=n)
# pylint: disable=g-complex-comprehension
content = ([{"name": "."}] +
[{"name": n,
content = ([{"name": n,
"size": len(n.encode("utf-8")),
"data": n.encode("utf-8")}
for n in names])
Expand All @@ -187,35 +186,15 @@ def testDottedFiles(self):
f.add_file("..e")
f.add_file(".f")
content = [
{"name": "."}, {"name": "./a"}, {"name": "/b"}, {"name": "./c"},
{"name": "./.d"}, {"name": "./..e"}, {"name": "./.f"}
{"name": "a"},
{"name": "/b"},
{"name": "./c"},
{"name": "./.d"},
{"name": "..e"},
{"name": ".f"}
]
self.assertTarFileContent(self.tempfile, content)

def testAddDir(self):
# For some strange reason, ending slash is stripped by the test
content = [
{"name": ".", "mode": 0o755},
{"name": "./a", "mode": 0o755},
{"name": "./a/b", "data": b"ab", "mode": 0o644},
{"name": "./a/c", "mode": 0o755},
{"name": "./a/c/d", "data": b"acd", "mode": 0o644},
]
tempdir = os.path.join(os.environ["TEST_TMPDIR"], "test_dir")
# Iterate over the `content` array to create the directory
# structure it describes.
for c in content:
if "data" in c:
# Create files is happening locally, so use native path sep.
path_parts = c["name"][2:].split('/')
p = os.path.join(tempdir, *path_parts)
os.makedirs(os.path.dirname(p))
with open(p, "wb") as f:
f.write(c["data"])
with archive.TarFileWriter(self.tempfile) as f:
f.add_dir("./", tempdir, mode=0o644)
self.assertTarFileContent(self.tempfile, content)

def testMergeTar(self):
content = [
{"name": "./a", "data": b"a"},
Expand All @@ -230,7 +209,6 @@ def testMergeTar(self):

def testMergeTarRelocated(self):
content = [
{"name": ".", "mode": 0o755},
{"name": "./foo", "mode": 0o755},
{"name": "./foo/a", "data": b"a"},
{"name": "./foo/ab", "data": b"ab"},
Expand Down Expand Up @@ -276,43 +254,8 @@ def testAddingDirectoriesForFile(self):
with archive.TarFileWriter(self.tempfile) as f:
f.add_file("d/f")
content = [
{"name": ".",
"mode": 0o755},
{"name": "./d",
"mode": 0o755},
{"name": "./d/f"},
]
self.assertTarFileContent(self.tempfile, content)

def testAddingDirectoriesForFileSeparately(self):
d_dir = os.path.join(os.environ["TEST_TMPDIR"], "d_dir")
os.makedirs(d_dir)
with open(os.path.join(d_dir, "dir_file"), "w"):
pass
a_dir = os.path.join(os.environ["TEST_TMPDIR"], "a_dir")
os.makedirs(a_dir)
with open(os.path.join(a_dir, "dir_file"), "w"):
pass

with archive.TarFileWriter(self.tempfile) as f:
f.add_dir("d", d_dir)
f.add_file("d/f")

f.add_dir("a", a_dir)
f.add_file("a/b/f")
content = [
{"name": ".",
"mode": 0o755},
{"name": "./d",
"mode": 0o755},
{"name": "./d/dir_file"},
{"name": "./d/f"},
{"name": "./a",
"mode": 0o755},
{"name": "./a/dir_file"},
{"name": "./a/b",
"mode": 0o755},
{"name": "./a/b/f"},
{"name": "d", "mode": 0o755},
{"name": "d/f"},
]
self.assertTarFileContent(self.tempfile, content)

Expand All @@ -330,23 +273,15 @@ def testAddingDirectoriesForFileManually(self):
f.add_file("x/y/f")
f.add_file("x", tarfile.DIRTYPE)
content = [
{"name": ".",
"mode": 0o755},
{"name": "./d",
"mode": 0o755},
{"name": "./d/f"},
{"name": "./a",
"mode": 0o755},
{"name": "./a/b",
"mode": 0o755},
{"name": "./a/b/c",
"mode": 0o755},
{"name": "./a/b/c/f"},
{"name": "./x",
"mode": 0o755},
{"name": "./x/y",
"mode": 0o755},
{"name": "./x/y/f"},
{"name": "d", "mode": 0o755},
{"name": "d/f"},
{"name": "a", "mode": 0o755},
{"name": "a/b", "mode": 0o755},
{"name": "a/b/c", "mode": 0o755},
{"name": "a/b/c/f"},
{"name": "x", "mode": 0o755},
{"name": "x/y", "mode": 0o755},
{"name": "x/y/f"},
]
self.assertTarFileContent(self.tempfile, content)

Expand All @@ -364,22 +299,15 @@ def testChangingRootDirectory(self):
f.add_file("x/y/f")
f.add_file("x", tarfile.DIRTYPE)
content = [
{"name": "root",
"mode": 0o755},
{"name": "root/d",
"mode": 0o755},
{"name": "root", "mode": 0o755},
{"name": "root/d", "mode": 0o755},
{"name": "root/d/f"},
{"name": "root/a",
"mode": 0o755},
{"name": "root/a/b",
"mode": 0o755},
{"name": "root/a/b/c",
"mode": 0o755},
{"name": "root/a", "mode": 0o755},
{"name": "root/a/b", "mode": 0o755},
{"name": "root/a/b/c", "mode": 0o755},
{"name": "root/a/b/c/f"},
{"name": "root/x",
"mode": 0o755},
{"name": "root/x/y",
"mode": 0o755},
{"name": "root/x", "mode": 0o755},
{"name": "root/x/y", "mode": 0o755},
{"name": "root/x/y/f"},
]
self.assertTarFileContent(self.tempfile, content)
Expand All @@ -396,7 +324,6 @@ def testPackageDirFileAttribute(self):
"rules_pkg/tests/tar/test_tar_package_dir_file.tar")

expected_content = [
{"name": "."},
{"name": "./package"},
{"name": "./package/nsswitch.conf"},
]
Expand Down
Loading

0 comments on commit d915e26

Please sign in to comment.