Skip to content

Commit

Permalink
Add verify_archive rule to do e2e tests on built archives.
Browse files Browse the repository at this point in the history
We could do more, but this should be good enough to start.

Fixes bazelbuild#644
  • Loading branch information
aiuto committed Feb 10, 2023
1 parent 694e3ad commit 9ac3376
Show file tree
Hide file tree
Showing 5 changed files with 321 additions and 0 deletions.
10 changes: 10 additions & 0 deletions pkg/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,13 @@ py_binary(
python_version = "PY3",
visibility = ["//visibility:public"],
)

# This might be public, but use at your own risk
py_library(
name = "verify_archive_test_lib",
srcs = ["verify_archive_test_lib.py"],
srcs_version = "PY3",
visibility = ["//visibility:public"],
)

exports_files(["verify_archive_test_main.py.tpl"])
128 changes: 128 additions & 0 deletions pkg/verify_archive.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Copyright 2023 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Rule to test that the content of an archive has particular properties.
This is available for integration testing, when people want to verify that all
the files they expect are in an archive. Or possibly, they want to verify that
some files do not appear.
The execution time is O(# expected patterns * size of archive).
"""

load("@rules_python//python:defs.bzl", "py_test")


def _gen_verify_archive_test_main_impl(ctx):
ctx.actions.expand_template(
template = ctx.file._template,
output = ctx.outputs.out,
# @unsorted-dict-items
substitutions = {
"${TEST_NAME}": ctx.attr.test_name,
"${TARGET}": ctx.files.target[0].short_path,
"${MUST_CONTAIN}": str(ctx.attr.must_contain),
"${MUST_CONTAIN_REGEX}": str(ctx.attr.must_contain_regex),
"${MUST_NOT_CONTAIN}": str(ctx.attr.must_not_contain),
"${MUST_NOT_CONTAIN_REGEX}": str(ctx.attr.must_not_contain_regex),
"${MIN_SIZE}": str(ctx.attr.min_size),
"${MAX_SIZE}": str(ctx.attr.max_size),
},
)
return [
DefaultInfo(files = depset([ctx.outputs.out])),
]

_gen_verify_archive_test_main = rule(
implementation = _gen_verify_archive_test_main_impl,
# @unsorted-dict-items
attrs = {
"out": attr.output(mandatory = True),
"test_name": attr.string(mandatory = True),
"target": attr.label(
doc = "Archive to test",
allow_single_file = True,
mandatory = True,
),

"must_contain": attr.string_list(
doc = "List of paths which all must appear in the archive.",
),
"must_contain_regex": attr.string_list(
doc = "List of regexes which all must appear in the archive.",
),
"must_not_contain": attr.string_list(
doc = """List of paths that must not be in the archive.""",
),
"must_not_contain_regex": attr.string_list(
doc = """List of regexes that must not be in the archive.""",
),
"min_size": attr.int(
doc = """Miniumn number of entries in the archive."""
),
"max_size": attr.int(
doc = """Miniumn number of entries in the archive."""
),

# Implicit dependencies.
"_template": attr.label(
default = Label("//pkg:verify_archive_test_main.py.tpl"),
allow_single_file = True,
),
},
)

def verify_archive_test(name, target,
must_contain=None, must_contain_regex=None,
must_not_contain=None, must_not_contain_regex=None,
min_size=1, max_size=-1):
"""Tests that an archive contains specific file patterns.
This test is used to verify that an archive contains the expected content.
Args:
target: A target archive.
must_contain: A list of paths which must appear in the archive.
must_contain_regex: A list of path regexes which must appear in the archive.
must_not_contain: A list of paths which must not appear in the archive.
must_not_contain_regex: A list of path regexes which must not appear in the archive.
min_size: The minimum number of entries which must be in the archive.
max_size: The maximum number of entries which must be in the archive.
"""
test_src = name + "__internal_main.py"
_gen_verify_archive_test_main(
name = name + "_internal_main",
target = target,
test_name = name.replace('-', '_') + "Test",
out = test_src,
must_contain = must_contain,
must_contain_regex = must_contain_regex,
must_not_contain = must_not_contain,
must_not_contain_regex = must_not_contain_regex,
min_size = min_size,
max_size = max_size,
)
py_test(
name = name,
# Hey reviewer!!! What if we just added the source to the test lib
# here, so we would not have to make the library for that public?
srcs = [":" + test_src],
main = test_src,
data = [target],
python_version = "PY3",
deps = [
"//pkg:verify_archive_test_lib",
"@bazel_tools//tools/python/runfiles",
],
)
117 changes: 117 additions & 0 deletions pkg/verify_archive_test_lib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright 2023 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compare to content manifest files."""

import json
import re
import tarfile
import unittest

from bazel_tools.tools.python.runfiles import runfiles

def is_probably_regex(s):
"""Is s probably a regex?.
# we purposefully do not count
"""
for c in ('?', '*', '^', '$', '[', ']', '(', ')', '{', '}', '|', '\\'):
if c in s:
return True
return False


class VerifyArchiveTest(unittest.TestCase):
"""Test harness to see if we wrote the content manifest correctly."""

#run_files = runfiles.Create()
#target_path = VerifyArchiveTest.run_files.Rlocation('rules_pkg/' + target)

def setUp(self):
super(VerifyArchiveTest, self).setUp()

def scan_target(self, target):
parts = target.split('.')
ext = parts[-1]
if ext[0] == 't' or parts[-2] == 'tar':
self.load_tar(target)
elif ext[0] == 'z':
self.fail('Can not process zip yet')
else:
self.fail('Can not figure out the archive type for (%s)' % target)

def load_tar(self, path):
self.paths = []
with tarfile.open(path, 'r:*') as f:
i = 0
for info in f:
self.paths.append(info.name)

def assertMinSize(self, min_size):
"""Check that the archive contains at least min_size entries.
Args:
min_size: The minium number of targets we expect.
"""
actual_size = len(self.paths)
self.assertGreaterEqual(
len(self.paths),
min_size,
msg = "Expected at least %d files, but found only %d" % (
min_size, actual_size))

def assertMaxSize(self, max_size):
"""Check that the archive contains at most max_size entries.
Args:
max_size: The maximum number of targets we expect.
"""
actual_size = len(self.paths)
self.assertLessEqual(
len(self.paths),
max_size,
msg = "Expected at most %d files, but found %d" % (
max_size, actual_size))

def check_must_contain(self, must_contain):
plain_patterns = set(must_contain)
for path in self.paths:
if path in plain_patterns:
plain_patterns.remove(path)
if len(plain_patterns) > 0:
self.fail('These required paths were not found: %s' % ','.join(plain_patterns))

def check_must_not_contain(self, must_not_contain):
plain_patterns = set(must_not_contain)
for path in self.paths:
if path in plain_patterns:
self.fail('Found disallowed path (%s) in the archive' % path)

def check_must_contain_regex(self, must_contain_regex):
for pattern in must_contain_regex:
r_comp = re.compile(pattern)
matched = False
for path in self.paths:
if r_comp.match(path):
matched = True
break
if not match:
self.fail('Did not find pattern (%s) in the archive' % pattern)

def check_must_not_contain_regex(self, must_not_contain_regex):
for pattern in must_not_contain_regex:
r_comp = re.compile(pattern)
matched = False
for path in self.paths:
if r_comp.match(path):
self.fail('Found disallowed pattern (%s) in the archive' % pattern)
46 changes: 46 additions & 0 deletions pkg/verify_archive_test_main.py.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2023 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for generated content manifest."""

import unittest

from pkg import verify_archive_test_lib

class ${TEST_NAME}(verify_archive_test_lib.VerifyArchiveTest):

def setUp(self):
super(${TEST_NAME}, self).setUp()
self.scan_target('${TARGET}')

def test_min_size(self):
self.assertMinSize(${MIN_SIZE})

def test_max_size(self):
self.assertMaxSize(${MAX_SIZE})

def test_must_contain(self):
self.check_must_contain(${MUST_CONTAIN})

def test_must_not_contain(self):
self.check_must_not_contain(${MUST_NOT_CONTAIN})

def test_must_not_contain(self):
self.check_must_contain_regex(${MUST_CONTAIN_REGEX})

def test_must_not_contain(self):
self.check_must_not_contain_regex(${MUST_NOT_CONTAIN_REGEX})


if __name__ == '__main__':
unittest.main()
20 changes: 20 additions & 0 deletions tests/tar/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

# buildifier: disable=bzl-visibility
load("//pkg:mappings.bzl", "pkg_files", "pkg_mklink", "strip_prefix")
load("//pkg:verify_archive.bzl", "verify_archive_test")
load("//pkg/private/tar:tar.bzl", "SUPPORTED_TAR_COMPRESSIONS", "pkg_tar")
load("//tests:my_package_name.bzl", "my_package_naming")
load("//tests/util:defs.bzl", "directory", "fake_artifact", "link_tree")
Expand Down Expand Up @@ -265,6 +266,25 @@ pkg_tar(
],
)

verify_archive_test(
name = "repackaging_long_filename_test",
target = ":test-tar-repackaging-long-filename",
must_contain = [
"can_i_repackage_a_file_with_a_long_name/file_with_a_ridiculously_long_name_consectetur_adipiscing_elit_fusce_laoreet_lorem_neque_sed_pharetra_erat.txt",
],
# there is really no need for these cases. I just want to use all the test capabilities.
must_not_contain = [
"i_am_not here",
],
must_contain_regex = [
".*can_i_repackage_a_file_with_a_long_name/$",
],
must_not_contain_regex = [
"^five.is.right.out",
],
max_size = 2,
)

pkg_tar(
name = "test-tar-tree-artifact",
srcs = [
Expand Down

0 comments on commit 9ac3376

Please sign in to comment.