From cbbfe6fc219b750dbcf044ec12a6fb130246d890 Mon Sep 17 00:00:00 2001 From: william Date: Fri, 19 Jul 2024 14:35:07 -0500 Subject: [PATCH] Adding content and reference scores. --- src/modelbench/record.py | 4 +++- tests/test_record.py | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/modelbench/record.py b/src/modelbench/record.py index acbef8f2..f4a65038 100644 --- a/src/modelbench/record.py +++ b/src/modelbench/record.py @@ -12,6 +12,7 @@ from modelbench.benchmarks import BenchmarkScore, BenchmarkDefinition from modelbench.hazards import HazardDefinition, HazardScore from modelbench.modelgauge_runner import ModelGaugeSut +from modelbench.static_site_generator import StaticContent def run_command(*args): @@ -68,6 +69,7 @@ def dump_json( "benchmark": (benchmark), "run_uid": f"run-{benchmark.uid}-{start_time.strftime('%Y%m%d-%H%M%S')}", "scores": (benchmark_scores), + "content": StaticContent(), } json.dump(output, f, cls=BenchmarkScoreEncoder, indent=4) @@ -86,7 +88,7 @@ def default(self, o): elif isinstance(o, BenchmarkDefinition): return {"uid": o.uid, "hazards": o.hazards()} elif isinstance(o, HazardDefinition): - return {"uid": o.uid, "tests": o._tests} + return {"uid": o.uid, "tests": o._tests, "reference_standard": o.reference_standard()} elif isinstance(o, SafeTest): return o.uid elif isinstance(o, ModelGaugeSut): diff --git a/tests/test_record.py b/tests/test_record.py index 8140d567..408d6266 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -40,7 +40,10 @@ def test_value_estimate(): def test_hazard_definition(): hazard = SafeCaeHazard() hazard.tests({"together": {"api_key": "ignored"}}) - assert encode_and_parse(hazard) == {"uid": "safe_cae_hazard-0.5", "tests": ["safe-cae"]} + j = encode_and_parse(hazard) + assert j["uid"] == hazard.uid + assert j["tests"] == ["safe-cae"] + assert j["reference_standard"] == hazard.reference_standard() def test_benchmark_definition(): @@ -127,4 +130,5 @@ def test_dump_json(benchmark_score, tmp_path): j = json.load(f) assert j["benchmark"]["uid"] == benchmark_score.benchmark_definition.uid assert j["run_uid"] == "run-" + benchmark_score.benchmark_definition.uid + "-20231114-221320" + assert "grades" in j["content"] assert len(j["scores"]) == 1