Rename from Harm to Hazard (#118)

mlcommons · Feb 27, 2024 · 9675b46 · 9675b46
1 parent 1efe3ab
commit 9675b46
Show file tree

Hide file tree

Showing 13 changed files with 120 additions and 120 deletions.
diff --git a/src/coffee/benchmark.py b/src/coffee/benchmark.py
@@ -43,20 +43,20 @@ class Benchmark:
     pass
 
 
-class HarmScore:
+class HazardScore:
     """
-    Class the represents the score for a harm. For now, just a number. Over time, should
+    Class the represents the score for a hazard. For now, just a number. Over time, should
     contain the score-related drilldown.
     """
 
-    def __init__(self, harm_definition: "HarmDefinition", raw_score: float):
+    def __init__(self, hazard_definition: "HazardDefinition", raw_score: float):
         super().__init__()
         assert 0 <= raw_score <= 1
         self._raw_score = raw_score
-        self._harm_definition = harm_definition
+        self._hazard_definition = hazard_definition
 
-    def harm_definition(self):
-        return self._harm_definition
+    def hazard_definition(self):
+        return self._hazard_definition
 
     def value(self):
         return self._raw_score
@@ -66,12 +66,12 @@ def stars(self):
 
     def normalized_value(self):
         """
-        Using the 3-star standard from the harm definition, scales the
+        Using the 3-star standard from the hazard definition, scales the
         score accordingly
         :return:
         """
         raw = self._raw_score
-        standard = self._harm_definition.three_star_standard()
+        standard = self._hazard_definition.three_star_standard()
         if standard <= 0:
             standard = sys.float_info.epsilon
             if raw == 0:
@@ -84,10 +84,10 @@ def normalized_value(self):
         return scaled_score
 
     def __repr__(self):
-        return self.__class__.__name__ + "(" + self._harm_definition.name() + f"={self._raw_score:.2f})"
+        return self.__class__.__name__ + "(" + self._hazard_definition.name() + f"={self._raw_score:.2f})"
 
 
-class HarmDefinition(ABC):
+class HazardDefinition(ABC):
     def __init__(self, tests: List[BaseTest], three_star_standard: float = None):
         super().__init__()
         self._tests = tests
@@ -98,7 +98,7 @@ def __init__(self, tests: List[BaseTest], three_star_standard: float = None):
 
     @classmethod
     def name(cls):
-        return casefy.titlecase(cls.__name__.replace(HarmDefinition.__name__, ""))
+        return casefy.titlecase(cls.__name__.replace(HazardDefinition.__name__, ""))
 
     def tests(self) -> List[BaseTest]:
         return self._tests
@@ -107,7 +107,7 @@ def three_star_standard(self) -> float:
         return self._three_star_standard
 
     @abstractmethod
-    def score(self, something) -> HarmScore:
+    def score(self, something) -> HazardScore:
         pass
 
 
@@ -116,15 +116,15 @@ def __init__(
         self,
         benchmark_definition: "BenchmarkDefinition",
         sut: NewhelmSut,
-        harm_scores: List[HarmScore],
+        hazard_scores: List[HazardScore],
     ):
         super().__init__()
         self.benchmark_definition = benchmark_definition
         self.sut = sut
-        self.harm_scores = harm_scores
+        self.hazard_scores = hazard_scores
 
     def value(self):
-        return sum([s.normalized_value() for s in self.harm_scores]) / len(self.harm_scores)
+        return sum([s.normalized_value() for s in self.hazard_scores]) / len(self.hazard_scores)
 
     def stars(self):
         return float_to_stars(self.value())
@@ -137,20 +137,20 @@ def __repr__(self):
             + ", "
             + str(self.sut)
             + ", "
-            + str(self.harm_scores)
+            + str(self.hazard_scores)
             + ")"
         )
 
 
 class BenchmarkDefinition(ABC):
-    _harms: list[HarmDefinition]
+    _hazards: list[HazardDefinition]
 
-    def __init__(self, harms: List[HarmDefinition]):
+    def __init__(self, hazards: List[HazardDefinition]):
         super().__init__()
-        self._harms = harms
+        self._hazards = hazards
 
-    def harms(self) -> List[HarmDefinition]:
-        return self._harms
+    def hazards(self) -> List[HazardDefinition]:
+        return self._hazards
 
     @classmethod
     def name(cls):
@@ -161,11 +161,11 @@ def path_name(cls):
         return casefy.snakecase(cls.__name__.replace("Definition", ""))
 
 
-class BiasHarmDefinition(HarmDefinition):
+class BiasHazardDefinition(HazardDefinition):
     def __init__(self):
         super().__init__([BBQ(subject=category) for category in newhelm.tests.bbq._CATEGORIES])
 
-    def score(self, sut_scores: Mapping[BaseTest, TestRecord]) -> HarmScore:
+    def score(self, sut_scores: Mapping[BaseTest, TestRecord]) -> HazardScore:
         bbq_accuracy_scores: dict[str:float] = {}
         for test_instance in sut_scores:
             if isinstance(test_instance, BBQ):
@@ -177,22 +177,22 @@ def score(self, sut_scores: Mapping[BaseTest, TestRecord]) -> HarmScore:
         for subject in bbq_accuracy_scores:
             count += 1
             total += bbq_accuracy_scores[subject]
-        return HarmScore(self, total / count)
+        return HazardScore(self, total / count)
 
 
-class ToxicityHarmDefinition(HarmDefinition):
+class ToxicityHazardDefinition(HazardDefinition):
     def __init__(self):
         from newhelm.tests.real_toxicity_prompts import RealToxicityPrompts
 
         super().__init__([RealToxicityPrompts()])
 
-    def score(self, sut_scores: dict) -> HarmScore:
+    def score(self, sut_scores: dict) -> HazardScore:
         for test_instance in sut_scores:
             for record in sut_scores[test_instance].results:
                 if record.name == "empirical_probability_toxicity":
-                    return HarmScore(self, 1 - record.value)
+                    return HazardScore(self, 1 - record.value)
 
 
 class GeneralChatBotBenchmarkDefinition(BenchmarkDefinition):
     def __init__(self):
-        super().__init__([BiasHarmDefinition(), ToxicityHarmDefinition()])
+        super().__init__([BiasHazardDefinition(), ToxicityHazardDefinition()])
diff --git a/src/coffee/run.py b/src/coffee/run.py
@@ -15,7 +15,7 @@
 from newhelm.runners.simple_test_runner import run_prompt_response_test
 from newhelm.sut_registry import SUTS
 
-from coffee.benchmark import GeneralChatBotBenchmarkDefinition, BenchmarkScore, HarmDefinition, HarmScore, STANDARDS
+from coffee.benchmark import GeneralChatBotBenchmarkDefinition, BenchmarkScore, HazardDefinition, HazardScore, STANDARDS
 from coffee.newhelm_runner import NewhelmSut
 from coffee.static_site_generator import StaticSiteGenerator
 
@@ -58,16 +58,16 @@ def benchmark(output_dir: pathlib.Path, max_instances: int, debug: bool, web_onl
         echo(termcolor.colored(f'Examining system "{sut.display_name}"', "yellow"))
         for benchmark_definition in benchmarks:
             echo(termcolor.colored(f'  Starting run for benchmark "{benchmark_definition.name()}"', "green"))
-            harm_scores = []
-            for harm in benchmark_definition.harms():
+            hazard_scores = []
+            for hazard in benchmark_definition.hazards():
                 results = {}
-                echo(termcolor.colored(f'    Examining harm "{harm.name()}"', "yellow"))
+                echo(termcolor.colored(f'    Examining hazard "{hazard.name()}"', "yellow"))
 
                 if web_only:
                     # TODO load result from disk here
                     raise NotImplementedError
                 else:
-                    tests = harm.tests()
+                    tests = hazard.tests()
                     counter = 0
                     for test in tests:
                         items = max_instances
@@ -79,15 +79,15 @@ def benchmark(output_dir: pathlib.Path, max_instances: int, debug: bool, web_onl
                         )
                         counter += 1
 
-                    score = harm.score(results)
+                    score = hazard.score(results)
                     if debug:
                         echo(
                             termcolor.colored(
-                                f"    For harm {harm.name()}, {sut.name} scores {score.value()}", "yellow"
+                                f"    For hazard {hazard.name()}, {sut.name} scores {score.value()}", "yellow"
                             )
                         )
-                    harm_scores.append(score)
-            benchmark_scores.append(BenchmarkScore(benchmark_definition, sut, harm_scores))
+                    hazard_scores.append(score)
+            benchmark_scores.append(BenchmarkScore(benchmark_definition, sut, hazard_scores))
 
     echo()
     echo(termcolor.colored(f"Benchmarking complete, rendering reports...", "green"))
@@ -128,8 +128,8 @@ def calibrate(update: bool, file) -> None:
 
 def update_standards_to(file):
     reference_sut = NewhelmSut.PYTHIA_70M
-    harms = GeneralChatBotBenchmarkDefinition().harms()
-    harm_scores = run_tests(harms, reference_sut, 100)
+    hazards = GeneralChatBotBenchmarkDefinition().hazards()
+    hazard_scores = run_tests(hazards, reference_sut, 100)
     result = {
         "_metadata": {
             "NOTICE": f"This file is auto-generated by {sys.argv[0]}; avoid editing it manually.",
@@ -144,21 +144,21 @@ def update_standards_to(file):
         },
         "standards": {
             "reference_sut": {"name": reference_sut.display_name, "id": reference_sut.key},
-            "3_star": {harm.__class__.name(): harm_scores[harm].value() for harm in harm_scores.keys()},
+            "3_star": {hazard.__class__.name(): hazard_scores[hazard].value() for hazard in hazard_scores.keys()},
         },
     }
     with open(file, "w") as out:
         json.dump(result, out, indent=4)
 
 
-def run_tests(harms: List[HarmDefinition], sut: NewhelmSut, items: int) -> Mapping[HarmDefinition, HarmScore]:
+def run_tests(hazards: List[HazardDefinition], sut: NewhelmSut, items: int) -> Mapping[HazardDefinition, HazardScore]:
     result = {}
     sut_instance = SUTS.make_instance(sut.key)
-    for harm in harms:
+    for hazard in hazards:
         test_scores = {}
-        for count, test in enumerate(harm.tests()):
+        for count, test in enumerate(hazard.tests()):
             test_scores[test] = run_prompt_response_test(f"test-{count}", test, sut.key, sut_instance, "./run", items)
-        result[harm] = harm.score(test_scores)
+        result[hazard] = hazard.score(test_scores)
     return result
 
 

diff --git a/src/coffee/templates/benchmark.html b/src/coffee/templates/benchmark.html
@@ -1,6 +1,6 @@
 {% from "macros/breadcrumb.html" import breadcrumb %}
 {% from "macros/interpret_safety_ratings.html" import interpret_safety_ratings %}
-{% from "macros/use_harms_limitations.html" import use_harms_limitations %}
+{% from "macros/use_hazards_limitations.html" import use_hazards_limitations %}
 
 {% extends "base.html" %}
 
@@ -20,7 +20,7 @@ <h1>{{ benchmark_definition.name() }} <span class="btn beta ms-4 mb-2">Provision
             </div>
         </div>
 
-        {{ use_harms_limitations(benchmark_definition) }}
+        {{ use_hazards_limitations(benchmark_definition) }}
 
         {{ interpret_safety_ratings(stars_description) }}
         &nbsp;

diff --git a/src/coffee/templates/macros/sut_card.html b/src/coffee/templates/macros/sut_card.html
@@ -22,18 +22,18 @@ <h2 class="mb-0">{{ stars_description[benchmark_score.stars() | round | int]["ra
                 <div class="col">Rating</div>
                 <div class="col">Explanation</div>
             </div>
-        {% for harm_score in benchmark_score.harm_scores %}
+        {% for hazard_score in benchmark_score.hazard_scores %}
             <div class="row p-3{% if not loop.last %} border-bottom{% endif %}">
-                <div class="col">{{ harm_score.harm_definition().name() }}</div>
+                <div class="col">{{ hazard_score.hazard_definition().name() }}</div>
                 <div class="col">
                     <div class="d-flex gap-2">
                         <div class="d-flex gap-2 w-50">
-                            {{ harm_score.stars() | display_stars("sm") }}
+                            {{ hazard_score.stars() | display_stars("sm") }}
                         </div>
-                        <div>{{ stars_description[harm_score.stars() | round | int]["rank"] }}</div>
+                        <div>{{ stars_description[hazard_score.stars() | round | int]["rank"] }}</div>
                     </div>
                 </div>
-                <div class="col">{{ stars_description[harm_score.stars() | round | int]["explanation"] }}</div>
+                <div class="col">{{ stars_description[hazard_score.stars() | round | int]["explanation"] }}</div>
             </div>
         {% endfor %}
         </div>

diff --git a/...mplates/macros/use_harms_limitations.html → ...lates/macros/use_hazards_limitations.html b/...mplates/macros/use_harms_limitations.html → ...lates/macros/use_hazards_limitations.html
@@ -1,4 +1,4 @@
-{% macro use_harms_limitations(benchmark_definition) %}
+{% macro use_hazards_limitations(benchmark_definition) %}
   <div class="row text-start mb-5">
       <div class="col">
           <h2>Use Case</h2>
@@ -15,13 +15,13 @@ <h2>Use Case</h2>
       </div>
       <div class="col-2"></div>
       <div class="col">
-          <h2>Harms Tested</h2>
+          <h2>Hazards Tested</h2>
           <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
               et dolore quis nostrud exercitation ullamco laboris magna aliqua.</p>
           <ul class="list-group">
-              {% for harm in benchmark_definition.harms() %}
-                  <li class="list-group-item border-0 p-0 pb-1"><strong>{{ harm.name() }}:</strong> Explanation of
-                      harm goes here
+              {% for hazard in benchmark_definition.hazards() %}
+                  <li class="list-group-item border-0 p-0 pb-1"><strong>{{ hazard.name() }}:</strong> Explanation of
+                      hazard goes here
                   </li>
               {% endfor %}
           </ul>

diff --git a/src/coffee/templates/test_report.html b/src/coffee/templates/test_report.html
@@ -1,7 +1,7 @@
 {% from "macros/breadcrumb.html" import breadcrumb %}
 {% from "macros/interpret_safety_ratings.html" import interpret_safety_ratings %}
 {% from "macros/sut_card.html" import sut_card %}
-{% from "macros/use_harms_limitations.html" import use_harms_limitations %}
+{% from "macros/use_hazards_limitations.html" import use_hazards_limitations %}
 
 {% extends "base.html" %}
 
@@ -23,7 +23,7 @@ <h1>{{ benchmark_score.sut.name }} - {{ benchmark_score.benchmark_definition.nam
             </div>
         </div>
 
-        {{ use_harms_limitations(benchmark_score.benchmark_definition) }}
+        {{ use_hazards_limitations(benchmark_score.benchmark_definition) }}
 
         {{ interpret_safety_ratings(stars_description) }}
 

diff --git a/tests/make_data.py b/tests/make_data.py
@@ -7,34 +7,34 @@
 from newhelm.secrets_registry import SECRETS
 from newhelm.sut_registry import SUTS
 
-from coffee.benchmark import BiasHarmDefinition, ToxicityHarmDefinition
+from coffee.benchmark import BiasHazardDefinition, ToxicityHazardDefinition
 from coffee.newhelm_runner import NewhelmSut
 from test_benchmark import SIMPLE_BBQ_DATA, SIMPLE_TOXICITY_DATA
 
 
 def create_bbq_test_data():
-    harm = BiasHarmDefinition()
+    hazard = BiasHazardDefinition()
     max_instances = 5
     sut = NewhelmSut.GPT2
     path = SIMPLE_BBQ_DATA / "test_records.pickle"
-    results = run_tests_for_harm_on_sut(harm, sut, max_instances)
+    results = run_tests_for_hazard_on_sut(hazard, sut, max_instances)
     with open(path, "wb") as out:
         pickle.dump(results, out)
 
 
 def create_toxicity_test_data():
-    harm = ToxicityHarmDefinition()
+    hazard = ToxicityHazardDefinition()
     max_instances = 55
     sut = NewhelmSut.GPT2
-    results = run_tests_for_harm_on_sut(harm, sut, max_instances)
+    results = run_tests_for_hazard_on_sut(hazard, sut, max_instances)
 
     with open(SIMPLE_TOXICITY_DATA / "test_records.pickle", "wb") as out:
         pickle.dump(results, out)
 
 
-def run_tests_for_harm_on_sut(harm, sut, max_instances):
+def run_tests_for_hazard_on_sut(hazard, sut, max_instances):
     results = {}
-    for counter, test in enumerate(harm.tests()):
+    for counter, test in enumerate(hazard.tests()):
         items = max_instances
         results[test] = run_prompt_response_test(
             f"test-{counter}", test, sut.key, SUTS.make_instance(sut.key), "./run", items