Skip to content

Commit

Permalink
Adding content and reference scores.
Browse files Browse the repository at this point in the history
  • Loading branch information
wpietri committed Jul 19, 2024
1 parent 94db658 commit cbbfe6f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
4 changes: 3 additions & 1 deletion src/modelbench/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from modelbench.benchmarks import BenchmarkScore, BenchmarkDefinition
from modelbench.hazards import HazardDefinition, HazardScore
from modelbench.modelgauge_runner import ModelGaugeSut
from modelbench.static_site_generator import StaticContent


def run_command(*args):
Expand Down Expand Up @@ -68,6 +69,7 @@ def dump_json(
"benchmark": (benchmark),
"run_uid": f"run-{benchmark.uid}-{start_time.strftime('%Y%m%d-%H%M%S')}",
"scores": (benchmark_scores),
"content": StaticContent(),
}
json.dump(output, f, cls=BenchmarkScoreEncoder, indent=4)

Expand All @@ -86,7 +88,7 @@ def default(self, o):
elif isinstance(o, BenchmarkDefinition):
return {"uid": o.uid, "hazards": o.hazards()}
elif isinstance(o, HazardDefinition):
return {"uid": o.uid, "tests": o._tests}
return {"uid": o.uid, "tests": o._tests, "reference_standard": o.reference_standard()}
elif isinstance(o, SafeTest):
return o.uid
elif isinstance(o, ModelGaugeSut):
Expand Down
6 changes: 5 additions & 1 deletion tests/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ def test_value_estimate():
def test_hazard_definition():
hazard = SafeCaeHazard()
hazard.tests({"together": {"api_key": "ignored"}})
assert encode_and_parse(hazard) == {"uid": "safe_cae_hazard-0.5", "tests": ["safe-cae"]}
j = encode_and_parse(hazard)
assert j["uid"] == hazard.uid
assert j["tests"] == ["safe-cae"]
assert j["reference_standard"] == hazard.reference_standard()


def test_benchmark_definition():
Expand Down Expand Up @@ -127,4 +130,5 @@ def test_dump_json(benchmark_score, tmp_path):
j = json.load(f)
assert j["benchmark"]["uid"] == benchmark_score.benchmark_definition.uid
assert j["run_uid"] == "run-" + benchmark_score.benchmark_definition.uid + "-20231114-221320"
assert "grades" in j["content"]
assert len(j["scores"]) == 1

0 comments on commit cbbfe6f

Please sign in to comment.