From 7ab7224028341f2e843cd65879d308e12a4edceb Mon Sep 17 00:00:00 2001 From: Martin Vrachev Date: Thu, 29 Jul 2021 17:08:17 +0300 Subject: [PATCH] Implement glob-like pattern matching According to the recently updated version of the specification the shell style wildcard matching is glob-like (see https://github.com/theupdateframework/specification/pull/174), and therefore a path separator in a path should not be matched by a wildcard in the PATHPATTERN. That's not what happens with `fnmatch.fnmatch()` which doesn't see "/" separator as a special symbol. For example: fnmatch.fnmatch("targets/foo.tgz", "*.tgz") will return True which is not what glob-like implementation will do. We should make sure that target_path and the pathpattern contain the same number of directories and because each part of the pathpattern could include a glob pattern we should check that fnmatch.fnmatch() is true on each target and pathpattern directory fragment separated by "/". Signed-off-by: Martin Vrachev --- tests/test_api.py | 30 ++++++++++++++++++++++++++++-- tuf/api/metadata.py | 26 ++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 2cf1222884..145fb76648 100755 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -31,7 +31,7 @@ MetaFile, TargetFile, Delegations, - DelegatedRole, + _is_target_in_pathpattern ) from tuf.api.serialization import ( @@ -40,7 +40,6 @@ from tuf.api.serialization.json import ( JSONSerializer, - JSONDeserializer, CanonicalJSONSerializer ) @@ -465,6 +464,33 @@ def test_metadata_root(self): with self.assertRaises(KeyError): root.signed.remove_key('root', 'nosuchkey') + def test_is_target_in_pathpattern(self): + supported_use_cases = [ + ("foo.tgz", "foo.tgz"), + ("foo.tgz", "*"), + ("foo.tgz", "*.tgz"), + ("foo-version-a.tgz", "foo-version-?.tgz"), + ("targets/foo.tgz", "targets/*.tgz"), + ("foo/bar/zoo/k.tgz", "foo/bar/zoo/*"), + ("foo/bar/zoo/k.tgz", "foo/*/zoo/*"), + ("foo/bar/zoo/k.tgz", "*/*/*/*"), + ("foo/bar", "f?o/bar"), + ("foo/bar", "*o/bar"), + ] + for targetpath, pathpattern in supported_use_cases: + self.assertTrue(_is_target_in_pathpattern(targetpath, pathpattern)) + + invalid_use_cases = [ + ("targets/foo.tgz", "*.tgz"), + ("/foo.tgz", "*.tgz",), + ("targets/foo.tgz", "*"), + ("foo-version-alpha.tgz", "foo-version-?.tgz"), + ("foo//bar", "*/bar"), + ("foo/bar", "f?/bar") + ] + for targetpath, pathpattern in invalid_use_cases: + self.assertFalse(_is_target_in_pathpattern(targetpath, pathpattern)) + def test_delegation_class(self): # empty keys and roles diff --git a/tuf/api/metadata.py b/tuf/api/metadata.py index bf497fcf88..c1bf970cc7 100644 --- a/tuf/api/metadata.py +++ b/tuf/api/metadata.py @@ -980,6 +980,27 @@ def update(self, rolename: str, role_info: MetaFile) -> None: self.meta[metadata_fn] = role_info +def _is_target_in_pathpattern(targetpath: str, pathpattern: str) -> bool: + """Determines whether "targetname" matches the "pathpattern".""" + # We need to make sure that targetname and pathpattern are pointing to the + # same directory as fnmatch doesn't threat "/" as a special symbol. Example: + # fnmatch.fnmatch("targets/foo.tgz", "*.tgz") will return True. + target_parts = targetpath.split("/") + pattern_parts = pathpattern.split("/") + if len(target_parts) != len(pattern_parts): + # Difference in the number of nested dirs means that target_dir and + # pathpattern_dir point to different places. + return False + + # Every part in the pathpattern could include a glob pattern, that's why + # each of the target parts should match the corresponding pathpattern part. + for target_dir, pattern_dir in zip(target_parts, pattern_parts): + if not fnmatch.fnmatch(target_dir, pattern_dir): + return False + + return True + + class DelegatedRole(Role): """A container with information about a delegated role. @@ -1077,8 +1098,9 @@ def is_delegated_path(self, target_filepath: str) -> bool: # are also considered matches. Make sure to strip any leading # path separators so that a match is made. # Example: "foo.tgz" should match with "/*.tgz". - if fnmatch.fnmatch( - target_filepath.lstrip(os.sep), pathpattern.lstrip(os.sep) + if _is_target_in_pathpattern( + target_filepath.lstrip(os.sep), + pathpattern.lstrip(os.sep), ): return True