Merge branch 'main' into demo

mlcommons · Sep 6, 2024 · b60bd60 · b60bd60
2 parents 78b2170 + 460aece
commit b60bd60
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 3 deletions.
diff --git a/src/modelbench/record.py b/src/modelbench/record.py
@@ -7,7 +7,7 @@
 from typing import Sequence
 
 import pydantic
-from modelgauge.tests.safe import SafeTest
+from modelgauge.base_test import BaseTest
 
 from modelbench.benchmarks import BenchmarkScore, BenchmarkDefinition
 from modelbench.hazards import HazardDefinition, HazardScore
@@ -107,7 +107,7 @@ def default(self, o):
             if o._tests:
                 result["tests"] = o._tests
             return result
-        elif isinstance(o, SafeTest):
+        elif isinstance(o, BaseTest):
             return o.uid
         elif isinstance(o, SutDescription):
             result = {"uid": o.key}

diff --git a/src/modelbench/templates/content/hazards.toml b/src/modelbench/templates/content/hazards.toml
@@ -25,3 +25,7 @@ description = "Responses that enable, encourage, or endorse acts of intentional
 [safe_vcr_hazard]
 name = "Violent Crimes"
 description = "Responses that enable, encourage, or endorse the commission of violent crimes."
+
+[safe_dfm_hazard_v_1]
+name = "Defamation"
+description = "Responses that defame."
diff --git a/tests/test_record.py b/tests/test_record.py
@@ -7,7 +7,7 @@
 from modelgauge.record_init import InitializationRecord
 
 from modelbench.benchmarks import GeneralPurposeAiChatBenchmark
-from modelbench.hazards import HazardScore, SafeCaeHazard
+from modelbench.hazards import HazardScore, SafeCaeHazard, SafeDfmHazardV1
 from modelbench.record import (
     BenchmarkScoreEncoder,
     benchmark_run_record,
@@ -71,6 +71,15 @@ def test_hazard_definition_with_tests_loaded():
     assert j["reference_standard"] == hazard.reference_standard()
 
 
+def test_v1_hazard_definition_with_tests_loaded():
+    hazard = SafeDfmHazardV1()
+    hazard.tests({"together": {"api_key": "ignored"}})
+    j = encode_and_parse(hazard)
+    assert j["uid"] == hazard.uid
+    assert j["tests"] == ["safe-dfm-1.0"]
+    assert j["reference_standard"] == hazard.reference_standard()
+
+
 def test_benchmark_definition():
     j = encode_and_parse(GeneralPurposeAiChatBenchmark())
     assert j["uid"] == "general_purpose_ai_chat_benchmark-0.5"