From 51fb0f64128c7272b79f91390b59258e89930c0f Mon Sep 17 00:00:00 2001 From: David Buchanan Date: Wed, 11 Dec 2024 18:05:33 +0000 Subject: [PATCH 1/4] mst-test-suite integration --- .gitmodules | 3 ++ README.md | 1 + arroba/tests/mst_test_suite.py | 83 ++++++++++++++++++++++++++++++++++ mst-test-suite | 1 + 4 files changed, 88 insertions(+) create mode 100644 .gitmodules create mode 100644 arroba/tests/mst_test_suite.py create mode 160000 mst-test-suite diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..607ce30 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "mst-test-suite"] + path = mst-test-suite + url = https://github.com/DavidBuchanan314/mst-test-suite diff --git a/README.md b/README.md index 60e8a41..6e80c78 100644 --- a/README.md +++ b/README.md @@ -251,6 +251,7 @@ Here's how to package, test, and ship a new release. ```sh source local/bin/activate.csh python -m unittest discover + python -m unittest arroba.tests.mst_test_suite # more extensive, slower tests (deliberately excluded from autodiscovery) ``` 1. Bump the version number in `pyproject.toml` and `docs/conf.py`. `git grep` the old version number to make sure it only appears in the changelog. Change the current changelog entry in `README.md` for this new version from _unreleased_ to the current date. 1. Build the docs. If you added any new modules, add them to the appropriate file(s) in `docs/source/`. Then run `./docs/build.sh`. Check that the generated HTML looks fine by opening `docs/_build/html/index.html` and looking around. diff --git a/arroba/tests/mst_test_suite.py b/arroba/tests/mst_test_suite.py new file mode 100644 index 0000000..d7cd59b --- /dev/null +++ b/arroba/tests/mst_test_suite.py @@ -0,0 +1,83 @@ +import os +import json + +import dag_cbor +import dag_cbor.random +from multiformats import CID, varint + +from tqdm import tqdm + +from ..diff import Change, Diff +from ..mst import MST +from ..storage import MemoryStorage, Block +from . import testutil + +class MSTSuiteTest(testutil.TestCase): + + def setUp(self): + super().setUp() + self.diff_testcases = {} + # recursively search for test cases in JSON format. + # for now we only know how to process "mst-diff" test cases - more types will be added + # in the future + self.test_suite_base = "./mst-test-suite/" + for path in [os.path.join(dp, f) for dp, _, fn in os.walk(self.test_suite_base + "/tests/") for f in fn]: + if not path.endswith(".json"): + continue + with open(path) as json_file: + testcase = json.load(json_file) + if testcase.get("$type") == "mst-diff": + self.diff_testcases[path] = testcase + + def populate_storage_from_car(self, storage: MemoryStorage, car_path: str) -> CID: + # ad-hoc CAR parser, returns the root CID + with open(self.test_suite_base + car_path, "rb") as carfile: + car_header = dag_cbor.decode(carfile.read(varint.decode(carfile))) + while True: + try: + block = carfile.read(varint.decode(carfile)) + except ValueError: + break + cid = CID.decode(block[:36]) + storage.blocks[cid] = Block(cid=cid, encoded=block[36:]) + return car_header["roots"][0] + + def test_diffs(self): + for testname, testcase in tqdm(self.diff_testcases.items()): + storage = MemoryStorage() + root_a = self.populate_storage_from_car(storage, testcase["inputs"]["mst_a"]) + root_b = self.populate_storage_from_car(storage, testcase["inputs"]["mst_b"]) + mst_a = MST.load(storage=storage, cid=root_a) + mst_b = MST.load(storage=storage, cid=root_b) + + diff: Diff = Diff.of(mst_b, mst_a) + + ops_list = [] + for created in diff.adds.values(): + ops_list.append({ + "rpath": created.key, + "old_value": None, + "new_value": created.cid.encode("base32") + }) + for updated in diff.updates.values(): + ops_list.append({ + "rpath": updated.key, + "old_value": updated.prev.encode("base32"), + "new_value": updated.cid.encode("base32") + }) + for removed in diff.deletes.values(): + ops_list.append({ + "rpath": removed.key, + "old_value": removed.cid.encode("base32"), + "new_value": None + }) + + # sort the lists for comparison, per mst-test-suite's rules + created_list = sorted(cid.encode("base32") for cid in diff.new_cids) + deleted_list = sorted(cid.encode("base32") for cid in diff.removed_cids) + ops_list.sort(key=lambda x: x["rpath"]) + + self.assertEqual(ops_list, testcase["results"]["record_ops"], f"{testname} record_ops") + self.assertEqual(created_list, testcase["results"]["created_nodes"], f"{testname} created_nodes") # currently fails! + self.assertEqual(deleted_list, testcase["results"]["deleted_nodes"], f"{testname} deleted_nodes") + # TODO: implement checks for proof_nodes, firehose_cids (test data hasn't been generated yet) diff --git a/mst-test-suite b/mst-test-suite new file mode 160000 index 0000000..1cfaab5 --- /dev/null +++ b/mst-test-suite @@ -0,0 +1 @@ +Subproject commit 1cfaab5c679a69092293d034a5e206bad46b3f29 From 8f724e5e35853e130110d7dbf2699c24ae414b62 Mon Sep 17 00:00:00 2001 From: David Buchanan Date: Wed, 11 Dec 2024 19:49:34 +0000 Subject: [PATCH 2/4] add 'inverse' diff test case --- arroba/tests/mst_test_suite.py | 73 ++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 13 deletions(-) diff --git a/arroba/tests/mst_test_suite.py b/arroba/tests/mst_test_suite.py index d7cd59b..6c9c226 100644 --- a/arroba/tests/mst_test_suite.py +++ b/arroba/tests/mst_test_suite.py @@ -1,4 +1,7 @@ +from typing import List, Tuple + import os +import io import json import dag_cbor @@ -7,7 +10,7 @@ from tqdm import tqdm -from ..diff import Change, Diff +from ..diff import Change, Diff, null_diff from ..mst import MST from ..storage import MemoryStorage, Block from . import testutil @@ -16,31 +19,47 @@ class MSTSuiteTest(testutil.TestCase): def setUp(self): super().setUp() - self.diff_testcases = {} # recursively search for test cases in JSON format. # for now we only know how to process "mst-diff" test cases - more types will be added # in the future self.test_suite_base = "./mst-test-suite/" + diff_testcases = {} for path in [os.path.join(dp, f) for dp, _, fn in os.walk(self.test_suite_base + "/tests/") for f in fn]: if not path.endswith(".json"): continue with open(path) as json_file: testcase = json.load(json_file) if testcase.get("$type") == "mst-diff": - self.diff_testcases[path] = testcase + diff_testcases[path] = testcase + self.diff_testcases = dict(sorted(diff_testcases.items())) # sort them because os.walk() uses a weird order + + def parse_car(self, stream) -> Tuple[CID, List[Tuple[CID, bytes]]]: + car_header = dag_cbor.decode(stream.read(varint.decode(stream))) + blocks = [] + while True: + try: + block = stream.read(varint.decode(stream)) + except ValueError: + break + blocks.append((CID.decode(block[:36]), block[36:])) + return car_header["roots"][0], blocks def populate_storage_from_car(self, storage: MemoryStorage, car_path: str) -> CID: # ad-hoc CAR parser, returns the root CID with open(self.test_suite_base + car_path, "rb") as carfile: - car_header = dag_cbor.decode(carfile.read(varint.decode(carfile))) - while True: - try: - block = carfile.read(varint.decode(carfile)) - except ValueError: - break - cid = CID.decode(block[:36]) - storage.blocks[cid] = Block(cid=cid, encoded=block[36:]) - return car_header["roots"][0] + root, blocks = self.parse_car(carfile) + for cid, value in blocks: + storage.blocks[cid] = Block(cid=cid, encoded=value) + return root + + def serialise_canonical_car(self, root: CID, blocks: List[Tuple[CID, bytes]]) -> bytes: + car = io.BytesIO() + header = dag_cbor.encode({"version": 1, "roots": [root]}) + car.write(varint.encode(len(header)) + header) + for cid, value in sorted(blocks, key=lambda x: bytes(x[0])): + entry = bytes(cid) + value + car.write(varint.encode(len(entry)) + entry) + return car.getvalue() def test_diffs(self): for testname, testcase in tqdm(self.diff_testcases.items()): @@ -72,7 +91,8 @@ def test_diffs(self): "new_value": None }) - # sort the lists for comparison, per mst-test-suite's rules + # sort the lists for comparison, per mst-test-suite's rules. + # NOTE: maybe we should just compare set()s instead? created_list = sorted(cid.encode("base32") for cid in diff.new_cids) deleted_list = sorted(cid.encode("base32") for cid in diff.removed_cids) ops_list.sort(key=lambda x: x["rpath"]) @@ -81,3 +101,30 @@ def test_diffs(self): self.assertEqual(created_list, testcase["results"]["created_nodes"], f"{testname} created_nodes") # currently fails! self.assertEqual(deleted_list, testcase["results"]["deleted_nodes"], f"{testname} deleted_nodes") # TODO: implement checks for proof_nodes, firehose_cids (test data hasn't been generated yet) + + def test_diffs_inverse(self): + # we re-use the diff test cases but "backwards" - applying the op list + # to the initial MST see if we end up at the correct final MST + for testname, testcase in tqdm(self.diff_testcases.items()): + storage = MemoryStorage() + root_a = self.populate_storage_from_car(storage, testcase["inputs"]["mst_a"]) + mst = MST.load(storage=storage, cid=root_a) + + for op in testcase["results"]["record_ops"]: + if op["old_value"] and op["new_value"]: # update + mst = mst.update(op["rpath"], CID.decode(op["new_value"])) + elif op["old_value"]: # delete + mst = mst.delete(op["rpath"]) + else: # create + mst = mst.add(op["rpath"], CID.decode(op["new_value"])) + + diff = null_diff(mst) # should get us a map of the complete new mst + root_b = mst.get_pointer() + + with open(self.test_suite_base + testcase["inputs"]["mst_b"], "rb") as car_b: + reference_root, reference_blocks = self.parse_car(car_b) + + reference_cid_set = set(x[0] for x in reference_blocks) # just look at the cids from the car + + self.assertEqual(root_b, reference_root, f"{testname} inverse: new root") # fails occasionally + self.assertEqual(diff.new_cids, reference_cid_set, f"{testname} inverse: new cid set") # basically always fails, I think I'm doing something wrong From 5dd35e626795266a33541f1f6d59368b31bda224 Mon Sep 17 00:00:00 2001 From: David Buchanan Date: Wed, 11 Dec 2024 20:07:35 +0000 Subject: [PATCH 3/4] drop tqdm, use self.subTest() --- arroba/tests/mst_test_suite.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/arroba/tests/mst_test_suite.py b/arroba/tests/mst_test_suite.py index 6c9c226..53efe45 100644 --- a/arroba/tests/mst_test_suite.py +++ b/arroba/tests/mst_test_suite.py @@ -1,4 +1,4 @@ -from typing import List, Tuple +from typing import List, Tuple, BinaryIO import os import io @@ -8,8 +8,6 @@ import dag_cbor.random from multiformats import CID, varint -from tqdm import tqdm - from ..diff import Change, Diff, null_diff from ..mst import MST from ..storage import MemoryStorage, Block @@ -33,7 +31,7 @@ def setUp(self): diff_testcases[path] = testcase self.diff_testcases = dict(sorted(diff_testcases.items())) # sort them because os.walk() uses a weird order - def parse_car(self, stream) -> Tuple[CID, List[Tuple[CID, bytes]]]: + def parse_car(self, stream: BinaryIO) -> Tuple[CID, List[Tuple[CID, bytes]]]: car_header = dag_cbor.decode(stream.read(varint.decode(stream))) blocks = [] while True: @@ -45,7 +43,6 @@ def parse_car(self, stream) -> Tuple[CID, List[Tuple[CID, bytes]]]: return car_header["roots"][0], blocks def populate_storage_from_car(self, storage: MemoryStorage, car_path: str) -> CID: - # ad-hoc CAR parser, returns the root CID with open(self.test_suite_base + car_path, "rb") as carfile: root, blocks = self.parse_car(carfile) for cid, value in blocks: @@ -62,7 +59,7 @@ def serialise_canonical_car(self, root: CID, blocks: List[Tuple[CID, bytes]]) -> return car.getvalue() def test_diffs(self): - for testname, testcase in tqdm(self.diff_testcases.items()): + for testname, testcase in self.diff_testcases.items(): storage = MemoryStorage() root_a = self.populate_storage_from_car(storage, testcase["inputs"]["mst_a"]) root_b = self.populate_storage_from_car(storage, testcase["inputs"]["mst_b"]) @@ -97,15 +94,18 @@ def test_diffs(self): deleted_list = sorted(cid.encode("base32") for cid in diff.removed_cids) ops_list.sort(key=lambda x: x["rpath"]) - self.assertEqual(ops_list, testcase["results"]["record_ops"], f"{testname} record_ops") - self.assertEqual(created_list, testcase["results"]["created_nodes"], f"{testname} created_nodes") # currently fails! - self.assertEqual(deleted_list, testcase["results"]["deleted_nodes"], f"{testname} deleted_nodes") - # TODO: implement checks for proof_nodes, firehose_cids (test data hasn't been generated yet) + with self.subTest(testcase["description"] + ": record_ops"): + self.assertEqual(ops_list, testcase["results"]["record_ops"]) + with self.subTest(testcase["description"] + ": created_nodes"): + self.assertEqual(created_list, testcase["results"]["created_nodes"]) # currently fails! + with self.subTest(testcase["description"] + ": deleted_nodes"): + self.assertEqual(deleted_list, testcase["results"]["deleted_nodes"]) + # TODO: implement checks for proof_nodes, firehose_cids (test data hasn't been generated yet) def test_diffs_inverse(self): # we re-use the diff test cases but "backwards" - applying the op list # to the initial MST see if we end up at the correct final MST - for testname, testcase in tqdm(self.diff_testcases.items()): + for testname, testcase in self.diff_testcases.items(): storage = MemoryStorage() root_a = self.populate_storage_from_car(storage, testcase["inputs"]["mst_a"]) mst = MST.load(storage=storage, cid=root_a) @@ -126,5 +126,7 @@ def test_diffs_inverse(self): reference_cid_set = set(x[0] for x in reference_blocks) # just look at the cids from the car - self.assertEqual(root_b, reference_root, f"{testname} inverse: new root") # fails occasionally - self.assertEqual(diff.new_cids, reference_cid_set, f"{testname} inverse: new cid set") # basically always fails, I think I'm doing something wrong + with self.subTest(testcase["description"] + " (inverse): new root"): + self.assertEqual(root_b, reference_root) # fails occasionally + with self.subTest(testcase["description"] + " (inverse): new cid set"): + self.assertEqual(diff.new_cids, reference_cid_set) # basically always fails, I think I'm doing something wrong From 1ab91f63f94ceb4338733b177c0eff86cb6fc22a Mon Sep 17 00:00:00 2001 From: David Buchanan Date: Thu, 12 Dec 2024 18:10:43 +0000 Subject: [PATCH 4/4] bump submodule --- mst-test-suite | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mst-test-suite b/mst-test-suite index 1cfaab5..ffab77a 160000 --- a/mst-test-suite +++ b/mst-test-suite @@ -1 +1 @@ -Subproject commit 1cfaab5c679a69092293d034a5e206bad46b3f29 +Subproject commit ffab77ad3001a805414ae44de3b99af4cc563c92