From 46796502f9f94d600b7ce260bd0a755d60c126c1 Mon Sep 17 00:00:00 2001 From: SolidifiedRay Date: Mon, 31 Aug 2020 14:43:58 -0700 Subject: [PATCH 1/4] Move snapshot function from before_after_filesystem_snapshot.py to create_layout.py Move snapshot function from before_after_filesystem_snapshot.py to create_laytou.py and remove before_after_filesystem_snapshot.py. This snapshot function will be used to enhance the rules generation function in create_layout.py. --- before_after_filesystem_snapshot.py | 64 ----------------------------- create_layout.py | 28 +++++++++++++ 2 files changed, 28 insertions(+), 64 deletions(-) delete mode 100644 before_after_filesystem_snapshot.py diff --git a/before_after_filesystem_snapshot.py b/before_after_filesystem_snapshot.py deleted file mode 100644 index da7d108..0000000 --- a/before_after_filesystem_snapshot.py +++ /dev/null @@ -1,64 +0,0 @@ -def snapshot(before_dict, after_dict): - '''before_after_snapshot is a simple function that returns which files were - unchanged, modified, added or removed from an input dictionary (before_dict) - and an output dictionary (after_dict). Both these dictionaries have file - names as the keys and their hashes as the values.''' - - unchanged_files = [] - modified_files = [] - added_files = [] - removed_files = [] - for key in before_dict: - if key in after_dict: - if before_dict[key] == after_dict[key]: - # Matching the hashes to check if file was unchanged - unchanged_files.append(key) - else: - modified_files.append(key) - else: - removed_files.append(key) - for key in after_dict: - if key not in before_dict: - # Looking for new files - added_files.append(key) - - # Returning the snapshot of the new file system - return (sorted(unchanged_files), sorted(modified_files), sorted(added_files), - sorted(removed_files)) - -def generate_artifact_rules(snapshot): - ''' - Generate Artifact Rules given which files have been added, which have been - removed, which have been modified, and which have remained unchanged. - ''' - expected_materials = [] - expected_products = [] - - # TODO: missing rules for MATCH since we don't have the information of the - # material from the previous step - for file in snapshot[0]: - # unchanged files - expected_materials.append(["ALLOW", file]) - for file in snapshot[1]: - # modified files - expected_materials.append(["ALLOW", file]) - for file in snapshot[3]: - # removed files - expected_materials.append(["DELETE", file]) - expected_materials.append(["DISALLOW", "*"]) - - for file in snapshot[0]: - # unchanged files - expected_products.append(["ALLOW", file]) - for file in snapshot[1]: - # modified files - expected_products.append(["MODIFY", file]) - for file in snapshot[2]: - # added files - expected_products.append(["CREATE", file]) - expected_products.append(["DISALLOW", "*"]) - - return { - 'expected_materials': expected_materials, - 'expected_products': expected_products - } diff --git a/create_layout.py b/create_layout.py index 4389dfe..071d554 100644 --- a/create_layout.py +++ b/create_layout.py @@ -91,6 +91,34 @@ import in_toto.models.link import in_toto.models.layout +def snapshot(before_dict, after_dict): + '''before_after_snapshot is a simple function that returns which files were + unchanged, modified, added or removed from an input dictionary (before_dict) + and an output dictionary (after_dict). Both these dictionaries have file + names as the keys and their hashes as the values.''' + + unchanged_files = [] + modified_files = [] + added_files = [] + removed_files = [] + for key in before_dict: + if key in after_dict: + if before_dict[key] == after_dict[key]: + # Matching the hashes to check if file was unchanged + unchanged_files.append(key) + else: + modified_files.append(key) + else: + removed_files.append(key) + for key in after_dict: + if key not in before_dict: + # Looking for new files + added_files.append(key) + + # Returning the snapshot of the new file system + return (sorted(unchanged_files), sorted(modified_files), sorted(added_files), + sorted(removed_files)) + def create_material_rules(links, index): """Create generic material rules (3 variants) From cc89c74e3732e752e251ea8c5f0a32695543d62f Mon Sep 17 00:00:00 2001 From: SolidifiedRay Date: Tue, 1 Sep 2020 14:08:39 -0700 Subject: [PATCH 2/4] Enhance rule generation function - Rename snapshot() function to changes_between_snapshots() - Generate DELETE, MODIFY, and CREATE rules generation - For MATCH rules, match only those that already were in the previous step, and allow the rest by name --- create_layout.py | 181 +++++++++++++++++++++++++++++------------------ 1 file changed, 111 insertions(+), 70 deletions(-) diff --git a/create_layout.py b/create_layout.py index 071d554..5f82d47 100644 --- a/create_layout.py +++ b/create_layout.py @@ -88,79 +88,119 @@ """ import os +import warnings import in_toto.models.link import in_toto.models.layout -def snapshot(before_dict, after_dict): - '''before_after_snapshot is a simple function that returns which files were - unchanged, modified, added or removed from an input dictionary (before_dict) - and an output dictionary (after_dict). Both these dictionaries have file - names as the keys and their hashes as the values.''' - - unchanged_files = [] - modified_files = [] - added_files = [] - removed_files = [] - for key in before_dict: - if key in after_dict: - if before_dict[key] == after_dict[key]: - # Matching the hashes to check if file was unchanged - unchanged_files.append(key) - else: - modified_files.append(key) - else: - removed_files.append(key) - for key in after_dict: - if key not in before_dict: - # Looking for new files - added_files.append(key) - - # Returning the snapshot of the new file system - return (sorted(unchanged_files), sorted(modified_files), sorted(added_files), - sorted(removed_files)) - -def create_material_rules(links, index): - """Create generic material rules (3 variants) - - * MATCH available materials with products from previous step (links must be an - ordered list) and - * ALLOW available materials if it is the first step in the - list - Returns a list of material rules - NOTE: Read header docstring for ideas for more complexity. """ - - expected_materials = [] - - if index == 0: - for material_name in links[index].materials.keys(): - expected_materials.append(["ALLOW", material_name]) - expected_materials.append(["DISALLOW", "*"]) - - else: - expected_materials = [ - ["MATCH", "*", "WITH", "PRODUCTS", "FROM", links[index - 1].name]] - - return expected_materials +def changes_between_snapshots(before_dict, after_dict): + """Given two 'snapshots' of an artifacts structure -- 'before' and 'after' -- + return a tuple specifying which artifacts have been added, which have been + removed, which have been modified, and which have remained unchanged. Both + these dictionaries have artifact names as the keys and their hashes as the + values.""" + before_set = set(before_dict.keys()) + after_set = set(after_dict.keys()) -def create_product_rules(links, index): - """Create generic product rules (2 variants) + removed_artifacts = before_set.difference(after_set) + added_artifacts = after_set.difference(before_set) - * ALLOW available products - * DISALLOW everything else - - Returns a list of product rules - NOTE: Read header docstring for ideas for more complexity. """ - - - expected_products = [] - - for product_name in links[index].materials.keys(): - expected_products.append(["ALLOW", product_name]) - - expected_products.append(["DISALLOW", "*"]) - - return expected_products + unchanged_artifacts = set() + modified_artifacts = set() + for key in before_set.intersection(after_set): + if before_dict[key] == after_dict[key]: + unchanged_artifacts.add(key) + else: + modified_artifacts.add(key) + + return (unchanged_artifacts, modified_artifacts, added_artifacts, + removed_artifacts) + +def create_material_rules(previous_link, current_link): + """Create generic material rules + + - MATCH available materials with products from previous step (links must be + an ordered list) and + - ALLOW available materials if it is the first step in the list + - DELETE removed materials + + Args: + previous_link: a link of previous step, including previous step's materials + and products + current link: a link of current step, including current step's materials + and products + + Returns: + a list of material rules + """ + + expected_materials_rules = [] + unchanged_artifacts, modified_artifacts, _, deleted_artifacts = \ + changes_between_snapshots(current_link.materials, current_link.products) + previous_link_products = previous_link.products if previous_link else [] + + # If there was a previous step, add MATCH rules for all materials that were + # products in the previous step + for artifact in sorted(set(current_link.materials).intersection( + previous_link_products)): + expected_materials_rules.append( + ["MATCH", artifact, "WITH", "PRODUCTS", "FROM", previous_link.name]) + + # Add DELETE rules for all deleted artifacts + for artifact in sorted(deleted_artifacts): + expected_materials_rules.append(["DELETE", artifact]) + # Warn for any delete rule that has no effect because of a previous match + # rule + if deleted_artifacts.intersection(previous_link_products): + warnings.warn("DELETE rule is moot because of the previous MATCH rule." + " Only the first rule for a given artifact has an effect") + + # Add ALLOW rules for all remaining materials + for artifact in sorted(set(current_link.materials).difference( + previous_link_products).difference(deleted_artifacts)): + expected_materials_rules.append(["ALLOW", artifact]) + + # Add DISALLOW rules for all other artifacts + expected_materials_rules.append(["DISALLOW", "*"]) + + return expected_materials_rules + + +def create_product_rules(current_link): + """Create generic product rules + + - ALLOW available products + - MODIFY changed products + - CREATE added products + - DISALLOW everything else + + Args: + current_link: a link of current step, including current step's materials + and products + + Returns: + a list of product rules + """ + + + expected_products_rules = [] + # Deleted artifacts won't show up in the product queue + unchanged_artifacts, modified_artifacts, added_artifacts, _ = \ + changes_between_snapshots(current_link.materials, current_link.products) + + for artifact in unchanged_artifacts: + # ALLOW unchanged artifacts + expected_products_rules.append(["ALLOW", artifact]) + for artifact in modified_artifacts: + # MODIFY modified artifacts + expected_products_rules.append(["MODIFY", artifact]) + for artifact in added_artifacts: + # CREATE added artifacts + expected_products_rules.append(["CREATE", artifact]) + # DISALLOW everything else + expected_products_rules.append(["DISALLOW", "*"]) + + return expected_products_rules def create_layout_from_ordered_links(links): @@ -173,12 +213,13 @@ def create_layout_from_ordered_links(links): for index, link in enumerate(links): step_name = link.name + previous_link = None if index == 0 else links[index-1] + current_link = link step = in_toto.models.layout.Step(name=step_name, - expected_materials=create_material_rules(links, index), - expected_products=create_product_rules(links, index), + expected_materials=create_material_rules(previous_link, current_link), + expected_products=create_product_rules(current_link), expected_command=link.command) layout.steps.append(step) - return layout From 32e0bf1ebe7b621ce75a62904edbe1a968404147 Mon Sep 17 00:00:00 2001 From: SolidifiedRay Date: Tue, 1 Sep 2020 14:17:04 -0700 Subject: [PATCH 3/4] Rename test_before_after_filesystem_snapshot.py to test_create_layout.py --- .travis.yml | 2 +- create_layout.py | 7 +++---- ..._after_filesystem_snapshot.py => test_create_layout.py} | 0 3 files changed, 4 insertions(+), 5 deletions(-) rename tests/{test_before_after_filesystem_snapshot.py => test_create_layout.py} (100%) diff --git a/.travis.yml b/.travis.yml index 28aee2d..69a058b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,4 +5,4 @@ cache: pip install: - pip install -r requirements.txt script: - python -m unittest tests.test_before_after_filesystem_snapshot + python -m unittest tests.test_create_layout diff --git a/create_layout.py b/create_layout.py index 5f82d47..deb4738 100644 --- a/create_layout.py +++ b/create_layout.py @@ -182,19 +182,18 @@ def create_product_rules(current_link): a list of product rules """ - expected_products_rules = [] # Deleted artifacts won't show up in the product queue unchanged_artifacts, modified_artifacts, added_artifacts, _ = \ changes_between_snapshots(current_link.materials, current_link.products) - for artifact in unchanged_artifacts: + for artifact in sorted(unchanged_artifacts): # ALLOW unchanged artifacts expected_products_rules.append(["ALLOW", artifact]) - for artifact in modified_artifacts: + for artifact in sorted(modified_artifacts): # MODIFY modified artifacts expected_products_rules.append(["MODIFY", artifact]) - for artifact in added_artifacts: + for artifact in sorted(added_artifacts): # CREATE added artifacts expected_products_rules.append(["CREATE", artifact]) # DISALLOW everything else diff --git a/tests/test_before_after_filesystem_snapshot.py b/tests/test_create_layout.py similarity index 100% rename from tests/test_before_after_filesystem_snapshot.py rename to tests/test_create_layout.py From 43d2792f082deb19012869070d626f311f120be2 Mon Sep 17 00:00:00 2001 From: SolidifiedRay Date: Tue, 1 Sep 2020 15:30:03 -0700 Subject: [PATCH 4/4] Add tests for rule generation function - Add test_create_material_rules_with_zero_index - Add test_create_material_rules_with_nonzero_index - Add test_create_product_rules --- tests/test_create_layout.py | 190 ++++++++++++++++++++++++++---------- 1 file changed, 139 insertions(+), 51 deletions(-) diff --git a/tests/test_create_layout.py b/tests/test_create_layout.py index 2817c5a..c9529aa 100644 --- a/tests/test_create_layout.py +++ b/tests/test_create_layout.py @@ -1,9 +1,10 @@ import unittest -import before_after_filesystem_snapshot +import create_layout +import in_toto.models.link -class Test_before_after_filesystem_snapshot(unittest.TestCase): +class Test_CreateLayout(unittest.TestCase): - '''Check whether the output of before_after_filesystem_snapshot is as defined + '''Check whether the output of create_layout is as defined by each test case.''' before = { @@ -13,6 +14,44 @@ class Test_before_after_filesystem_snapshot(unittest.TestCase): 'bar/bat/four.tgz': '6677889900112233' } + first_step_link_str = { + '_type': 'link', + 'name': 'first_step', + 'byproducts': {}, + 'environment': {}, + 'materials': {}, + 'command': [], + 'products': { + 'one.tgz': {'sha256': '1234567890abcdef'}, + 'foo/two.tgz': {'sha256': '0000001111112222'}, + 'three.txt': {'sha256': '1111222233334444'}, + 'bar/bat/four.tgz': {'sha256': '6677889900112233'} + } + } + + second_step_link_str = { + '_type': 'link', + 'name': 'second_step', + 'byproducts': {}, + 'environment': {}, + 'materials': { + 'one.tgz': {'sha256': '1234567890abcdef'}, + 'foo/two.tgz': {'sha256': '0000001111112222'}, + 'three.txt': {'sha256': '1111222233334444'}, + 'bar/bat/four.tgz': {'sha256': '6677889900112233'} + }, + 'command': [], + 'products': { + 'five.txt': {'sha256': '5555555555555555'}, + 'one.tgz': {'sha256': '1234567890abcdef'}, + 'foo/two.tgz': {'sha256': 'ffffffffffffffff'}, + 'bar/bat/four.tgz': {'sha256': '6677889900112233'}, + 'baz/six.tgz': {'sha256': '6666666666666666'} + } + } + + empty_set = set() + def test_same_filesystem_snapshot(self): after = { @@ -22,18 +61,28 @@ def test_same_filesystem_snapshot(self): 'bar/bat/four.tgz': '6677889900112233' } - snapshot = before_after_filesystem_snapshot.snapshot(self.before, after) - self.assertEqual(snapshot, (['bar/bat/four.tgz', 'foo/two.tgz', 'one.tgz', - 'three.txt'], [], [], [])) + unchanged, modified, added, deleted = \ + create_layout.changes_between_snapshots(self.before, after) + + self.assertEqual(unchanged, + {'one.tgz', 'foo/two.tgz', 'three.txt', 'bar/bat/four.tgz'}) + self.assertSetEqual(modified, self.empty_set) + self.assertSetEqual(added, self.empty_set) + self.assertSetEqual(deleted, self.empty_set) def test_removed_files_filesystem_snapshot(self): after = {} - snapshot = before_after_filesystem_snapshot.snapshot(self.before, after) - self.assertEqual(snapshot, ([], [], [], ['bar/bat/four.tgz', 'foo/two.tgz', - 'one.tgz', 'three.txt'])) + unchanged, modified, added, deleted = \ + create_layout.changes_between_snapshots(self.before, after) + + self.assertSetEqual(unchanged, self.empty_set) + self.assertSetEqual(modified, self.empty_set) + self.assertSetEqual(added, self.empty_set) + self.assertSetEqual(deleted, + {'bar/bat/four.tgz', 'foo/two.tgz', 'one.tgz', 'three.txt'}) def test_new_filesystem_snapshot(self): @@ -43,10 +92,15 @@ def test_new_filesystem_snapshot(self): 'foofoo/seven.txt': '1111222233334555' } - snapshot = before_after_filesystem_snapshot.snapshot(self.before, after) - self.assertEqual(snapshot, ([], [], ['five.tgz', 'foo/bar/six.tgz', - 'foofoo/seven.txt'], ['bar/bat/four.tgz', 'foo/two.tgz', 'one.tgz', - 'three.txt'])) + unchanged, modified, added, deleted = \ + create_layout.changes_between_snapshots(self.before, after) + + self.assertSetEqual(unchanged, self.empty_set) + self.assertSetEqual(modified, self.empty_set) + self.assertSetEqual(added, + {'five.tgz', 'foo/bar/six.tgz', 'foofoo/seven.txt'}) + self.assertSetEqual(deleted, + {'bar/bat/four.tgz', 'foo/two.tgz', 'one.tgz', 'three.txt'}) def test_fully_modified_filesystem_snapshot(self): @@ -58,9 +112,14 @@ def test_fully_modified_filesystem_snapshot(self): 'bar/bat/four.tgz': '6677889900123456' } - snapshot = before_after_filesystem_snapshot.snapshot(self.before, after) - self.assertEqual(snapshot, ([], ['bar/bat/four.tgz', 'foo/two.tgz', - 'one.tgz', 'three.txt'], [], [])) + unchanged, modified, added, deleted = \ + create_layout.changes_between_snapshots(self.before, after) + + self.assertSetEqual(unchanged, self.empty_set) + self.assertSetEqual(modified, + {'bar/bat/four.tgz', 'foo/two.tgz', 'one.tgz', 'three.txt'}) + self.assertSetEqual(added, self.empty_set) + self.assertSetEqual(deleted, self.empty_set) def test_partially_modified_filesystem_snapshot(self): @@ -73,41 +132,70 @@ def test_partially_modified_filesystem_snapshot(self): 'baz/six.tgz': '6666666666666666' } - snapshot = before_after_filesystem_snapshot.snapshot(self.before, after) - self.assertEqual(snapshot, (['one.tgz'], ['bar/bat/four.tgz', - 'foo/two.tgz'], ['baz/six.tgz', 'five.txt'], ['three.txt'])) - - def test_generate_artifact_rules(self): - - after = { - 'five.txt': '5555555555555555', - 'one.tgz': '1234567890abcdef', - 'foo/two.tgz': 'ffffffffffffffff', - 'bar/bat/four.tgz': '6677889900112233', - 'baz/six.tgz': '6666666666666666' - } - - artifact_rules = { - 'expected_materials': [ - ['ALLOW', 'bar/bat/four.tgz'], - ['ALLOW', 'one.tgz'], - ['ALLOW', 'foo/two.tgz'], - ['DELETE', 'three.txt'], - ['DISALLOW', '*'] - ], - 'expected_products': [ - ['ALLOW', 'bar/bat/four.tgz'], - ['ALLOW', 'one.tgz'], - ['MODIFY', 'foo/two.tgz'], - ['CREATE', 'baz/six.tgz'], - ['CREATE', 'five.txt'], - ['DISALLOW', '*'] - ] - } - - snapshot = before_after_filesystem_snapshot.snapshot(self.before, after) - rules = before_after_filesystem_snapshot.generate_artifact_rules(snapshot) - self.assertDictEqual(artifact_rules, rules) + unchanged, modified, added, deleted = \ + create_layout.changes_between_snapshots(self.before, after) + + self.assertSetEqual(unchanged, {'one.tgz'}) + self.assertSetEqual(modified, {'bar/bat/four.tgz', 'foo/two.tgz'}) + self.assertSetEqual(added, {'baz/six.tgz', 'five.txt'}) + self.assertSetEqual(deleted, {'three.txt'}) + + + def test_create_material_rules_of_initial_step(self): + # Zero index means that the current step is the initial step, + # so we need to ALLOW all the existing files instead of matching. + second_link = in_toto.models.link.Link.read(self.second_step_link_str) + links = [second_link] + + expected_materials = [ + ['DELETE', 'three.txt'], + ['ALLOW', 'bar/bat/four.tgz'], + ['ALLOW', 'foo/two.tgz'], + ['ALLOW', 'one.tgz'], + ['DISALLOW', '*'] + ] + + self.assertEqual(expected_materials, + create_layout.create_material_rules(None, second_link)) + + def test_create_material_rules_of_not_initial_step(self): + # Nonzero index means that the current step is not the initial step, + # so we need to MATCH materials with products of the previous step. + first_link = in_toto.models.link.Link.read(self.first_step_link_str) + second_link = in_toto.models.link.Link.read(self.second_step_link_str) + links = [first_link, second_link] + + # WARNING: if we have a MATCH rule and a DELETE rule on the same artifact, + # the first MATCH rule will moot the subsequent DELETE rule. + expected_materials = [ + ['MATCH', 'bar/bat/four.tgz', 'WITH', 'PRODUCTS', 'FROM', 'first_step'], + ['MATCH', 'foo/two.tgz', 'WITH', 'PRODUCTS', 'FROM', 'first_step'], + ['MATCH', 'one.tgz', 'WITH', 'PRODUCTS', 'FROM', 'first_step'], + ['MATCH', 'three.txt', 'WITH', 'PRODUCTS', 'FROM', 'first_step'], + ['DELETE', 'three.txt'], + ['DISALLOW', '*'] + ] + + self.assertEqual(expected_materials, + create_layout.create_material_rules(first_link, second_link)) + + def test_create_product_rules(self): + # Given the changes of second step's materials and product, + # generate the product rules. + second_link = in_toto.models.link.Link.read(self.second_step_link_str) + expected_products = [ + ['ALLOW', 'bar/bat/four.tgz'], + ['ALLOW', 'one.tgz'], + ['MODIFY', 'foo/two.tgz'], + ['CREATE', 'baz/six.tgz'], + ['CREATE', 'five.txt'], + ['DISALLOW', '*'] + ] + + self.assertTrue(expected_products, + create_layout.create_product_rules(second_link)) + + # TODO: missing test for create_layout_from_ordered_links if __name__ == '__main__': unittest.main()