From 411c4eaa561c4fda156dc227f52f5cb3a2a4b274 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 21 Jun 2024 09:51:41 -0400 Subject: [PATCH] Revert "Revert "Revert "CHIA-414 fixup datalayer benchmark"" (#18107)" This reverts commit e87d51a99348669d1f82e4b6fd7f7dd4b191cbe4. --- .../_tests/core/data_layer/test_data_store.py | 142 ++++++++---------- chia/_tests/process_junit.py | 42 +----- chia/_tests/util/misc.py | 28 ---- setup.py | 1 - 4 files changed, 73 insertions(+), 140 deletions(-) diff --git a/chia/_tests/core/data_layer/test_data_store.py b/chia/_tests/core/data_layer/test_data_store.py index f05812bf718d..0e04d36310bf 100644 --- a/chia/_tests/core/data_layer/test_data_store.py +++ b/chia/_tests/core/data_layer/test_data_store.py @@ -14,8 +14,6 @@ import aiohttp import aiosqlite -import big_o -import big_o.complexities import pytest from chia._tests.core.data_layer.util import Example, add_0123_example, add_01234567_example @@ -1517,101 +1515,91 @@ async def test_clear_pending_roots_returns_root( assert cleared_root == pending_root +@dataclass +class BatchInsertBenchmarkCase: + pre: int + count: int + limit: float + marks: Marks = () + + @property + def id(self) -> str: + return f"pre={self.pre},count={self.count}" + + +@dataclass +class BatchesInsertBenchmarkCase: + count: int + batch_count: int + limit: float + marks: Marks = () + + @property + def id(self) -> str: + return f"count={self.count},batch_count={self.batch_count}" + + +@datacases( + BatchInsertBenchmarkCase( + pre=0, + count=100, + limit=2.2, + ), + BatchInsertBenchmarkCase( + pre=1_000, + count=100, + limit=4, + ), + BatchInsertBenchmarkCase( + pre=0, + count=1_000, + limit=30, + ), + BatchInsertBenchmarkCase( + pre=1_000, + count=1_000, + limit=36, + ), + BatchInsertBenchmarkCase( + pre=10_000, + count=25_000, + limit=52, + ), +) @pytest.mark.anyio async def test_benchmark_batch_insert_speed( data_store: DataStore, store_id: bytes32, benchmark_runner: BenchmarkRunner, + case: BatchInsertBenchmarkCase, ) -> None: r = random.Random() r.seed("shadowlands", version=2) - test_size = 100 - max_pre_size = 20_000 - # may not be needed if big_o already considers the effect - # TODO: must be > 0 to avoid an issue with the log class? - lowest_considered_n = 2000 - simplicity_bias_percentage = 10 / 100 - - batch_count, remainder = divmod(max_pre_size, test_size) - assert remainder == 0, "the last batch would be a different size" - changelist = [ { "action": "insert", "key": x.to_bytes(32, byteorder="big", signed=False), "value": bytes(r.getrandbits(8) for _ in range(1200)), } - for x in range(max_pre_size) + for x in range(case.pre + case.count) ] - pre = changelist[:max_pre_size] - - records: Dict[int, float] = {} - - total_inserted = 0 - pre_iter = iter(pre) - with benchmark_runner.print_runtime( - label="overall", - clock=time.monotonic, - ): - while True: - pre_batch = list(itertools.islice(pre_iter, test_size)) - if len(pre_batch) == 0: - break - - with benchmark_runner.print_runtime( - label="count", - clock=time.monotonic, - ) as f: - await data_store.insert_batch( - store_id=store_id, - changelist=pre_batch, - # TODO: does this mess up test accuracy? - status=Status.COMMITTED, - ) + pre = changelist[: case.pre] + batch = changelist[case.pre : case.pre + case.count] - records[total_inserted] = f.result().duration - total_inserted += len(pre_batch) - - considered_durations = {n: duration for n, duration in records.items() if n >= lowest_considered_n} - ns = list(considered_durations.keys()) - durations = list(considered_durations.values()) - best_class, fitted = big_o.infer_big_o_class(ns=ns, time=durations) - simplicity_bias = simplicity_bias_percentage * fitted[best_class] - best_class, fitted = big_o.infer_big_o_class(ns=ns, time=durations, simplicity_bias=simplicity_bias) - - print(f"allowed simplicity bias: {simplicity_bias}") - print(big_o.reports.big_o_report(best=best_class, others=fitted)) - - assert isinstance( - best_class, (big_o.complexities.Constant, big_o.complexities.Linear) - ), f"must be constant or linear: {best_class}" - - coefficient_maximums = [0.65, 0.000_25, *(10**-n for n in range(5, 100))] - - coefficients = best_class.coefficients() - paired = list(zip(coefficients, coefficient_maximums)) - assert len(paired) == len(coefficients) - for index, [actual, maximum] in enumerate(paired): - benchmark_runner.record_value( - value=actual, - limit=maximum, - label=f"{type(best_class).__name__} coefficient {index}", + if case.pre > 0: + await data_store.insert_batch( + store_id=store_id, + changelist=pre, + status=Status.COMMITTED, ) - assert actual <= maximum, f"(coefficient {index}) {actual} > {maximum}: {paired}" - -@dataclass -class BatchesInsertBenchmarkCase: - count: int - batch_count: int - limit: float - marks: Marks = () - - @property - def id(self) -> str: - return f"count={self.count},batch_count={self.batch_count}" + with benchmark_runner.assert_runtime(seconds=case.limit): + await data_store.insert_batch( + store_id=store_id, + changelist=batch, + ) @datacases( diff --git a/chia/_tests/process_junit.py b/chia/_tests/process_junit.py index 48b8460ca230..fb1388ba62a6 100644 --- a/chia/_tests/process_junit.py +++ b/chia/_tests/process_junit.py @@ -189,32 +189,6 @@ def main( ) -def format_number(n: float) -> str: - complete = f"{n:.999f}" - integral_digits, decimal_separator, decimal_digits = complete.partition(".") - for index, digit in enumerate(decimal_digits): - if digit != "0": - places = index + 1 - break - else: - places = 0 - - group_size = 3 - - places = ((places + group_size) // group_size) * group_size - decimal_digits = decimal_digits[:places] - - result = "" - result += ",".join( - [integral_digits[start : start + group_size] for start in range(0, len(integral_digits), group_size)] - ) - result += "." - result += " ".join( - [decimal_digits[start : start + group_size] for start in range(0, len(decimal_digits), group_size)] - ) - return result - - def output_benchmark( link_line_separator: str, link_prefix: str, @@ -241,17 +215,17 @@ def output_benchmark( three_sigma_str = "-" if len(result.durations) > 1: durations_mean = mean(result.durations) - mean_str = f"{format_number(durations_mean)} s" + mean_str = f"{durations_mean:.3f} s" try: - three_sigma_str = f"{format_number(durations_mean + 3 * stdev(result.durations))} s" + three_sigma_str = f"{durations_mean + 3 * stdev(result.durations):.3f} s" except StatisticsError: pass durations_max = max(result.durations) - max_str = f"{format_number(durations_max)} s" + max_str = f"{durations_max:.3f} s" - limit_str = f"{format_number(result.limit)} s" + limit_str = f"{result.limit:.3f} s" percent = 100 * durations_max / result.limit if percent >= 100: @@ -318,17 +292,17 @@ def output_time_out_assert( three_sigma_str = "-" if len(result.durations) > 1: durations_mean = mean(result.durations) - mean_str = f"{format_number(durations_mean)} s" + mean_str = f"{durations_mean:.3f} s" try: - three_sigma_str = f"{format_number(durations_mean + 3 * stdev(result.durations))} s" + three_sigma_str = f"{durations_mean + 3 * stdev(result.durations):.3f} s" except StatisticsError: pass durations_max = max(result.durations) - max_str = f"{format_number(durations_max)} s" + max_str = f"{durations_max:.3f} s" - limit_str = f"{format_number(result.limit)} s" + limit_str = f"{result.limit:.3f} s" percent = 100 * durations_max / result.limit if percent >= 100: diff --git a/chia/_tests/util/misc.py b/chia/_tests/util/misc.py index 295fd16d4ae1..69131dec2942 100644 --- a/chia/_tests/util/misc.py +++ b/chia/_tests/util/misc.py @@ -390,34 +390,6 @@ def assert_runtime(self, *args: Any, **kwargs: Any) -> _AssertRuntime: kwargs.setdefault("overhead", self.overhead) return _AssertRuntime(*args, **kwargs) - def print_runtime(self, *args: Any, **kwargs: Any) -> _AssertRuntime: - kwargs.setdefault("enable_assertion", False) - # TODO: ick - kwargs.setdefault("seconds", 1) - kwargs.setdefault("overhead", self.overhead) - return _AssertRuntime(*args, **kwargs) - - def record_value(self, value: float, limit: float, label: str) -> None: - if ether.record_property is not None: - file, line = caller_file_and_line( - relative_to=( - pathlib.Path(chia.__file__).parent.parent, - pathlib.Path(chia._tests.__file__).parent.parent, - ) - ) - data = BenchmarkData( - duration=value, - path=pathlib.Path(file), - line=line, - limit=limit, - label=label, - ) - - ether.record_property( # pylint: disable=E1102 - data.tag, - json.dumps(data.marshal(), ensure_ascii=True, sort_keys=True), - ) - @contextlib.contextmanager def assert_rpc_error(error: str) -> Iterator[None]: diff --git a/setup.py b/setup.py index 10a19a37d0ca..ec095861110f 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,6 @@ ] dev_dependencies = [ - "big-o==0.11.0", "build==1.2.1", "coverage==7.5.3", "diff-cover==9.0.0",