Move partials to macros and tests (#59)

* Move partials to macros for testibility * Add tests for components and pages
mlcommons · Feb 2, 2024 · d3deae2 · d3deae2
1 parent 4b7899d
commit d3deae2
Show file tree

Hide file tree

Showing 20 changed files with 263 additions and 111 deletions.
diff --git a/src/coffee/templates/_breadcrumb.html b/src/coffee/templates/_breadcrumb.html
diff --git a/src/coffee/templates/_interpret_safety_ratings.html b/src/coffee/templates/_interpret_safety_ratings.html
diff --git a/src/coffee/templates/_use_harms_limitations.html b/src/coffee/templates/_use_harms_limitations.html
diff --git a/src/coffee/templates/benchmark.html b/src/coffee/templates/benchmark.html
@@ -1,11 +1,15 @@
+{% from "macros/breadcrumb.html" import breadcrumb %}
+{% from "macros/interpret_safety_ratings.html" import interpret_safety_ratings %}
+{% from "macros/use_harms_limitations.html" import use_harms_limitations %}
+
 {% extends "base.html" %}
 
 {% block title %}{{ benchmark_name }} Benchmark{% endblock %}
 
 {% block content %}
     <div class="px-4 py-5 my-5 text-center container-lg mx-auto">
 
-        {% include "_breadcrumb.html" %}
+        {{ breadcrumb(None, benchmark_definition) }}
 
         <div class="text-start mb-5">
             <h1>{{ benchmark_definition.name() }} <span class="btn beta ms-4 mb-2">Provisional</span></h1>
@@ -16,9 +20,9 @@ <h1>{{ benchmark_definition.name() }} <span class="btn beta ms-4 mb-2">Provision
             </div>
         </div>
 
-        {% include "_use_harms_limitations.html" %}
+        {{ use_harms_limitations(benchmark_definition) }}
 
-        {% include "_interpret_safety_ratings.html" %}
+        {{ interpret_safety_ratings(stars_description) }}
         &nbsp;
 
         <div class="text-start my-5">

diff --git a/src/coffee/templates/benchmarks.html b/src/coffee/templates/benchmarks.html
@@ -1,11 +1,14 @@
+{% from "macros/benchmark_card.html" import benchmark_card %}
+{% from "macros/breadcrumb.html" import breadcrumb %}
+
 {% extends "base.html" %}
 
 {% block title %}Benchmarks{% endblock %}
 
 {% block content %}
     <div class="px-4 py-5 my-5 text-center container-lg mx-auto">
 
-        {% include "_breadcrumb.html" %}
+        {{ breadcrumb(benchmark_score, benchmark_definition) }}
 
         <div class="text-start mb-5">
             <h1>AIP Safety Benchmarks <span type="button" class="btn beta ms-4 mb-2">Provisional</span></h1>
@@ -21,7 +24,7 @@ <h1>AIP Safety Benchmarks <span type="button" class="btn beta ms-4 mb-2">Provisi
         </div>
 
         {% for benchmark_definition in grouped_benchmark_scores %}
-            {% include "_benchmark_card.html" %}
+            {{ benchmark_card(True, benchmark_definition) }}
             <div class="mx-5">&nbsp;</div>
         {% endfor %}
 
@@ -38,4 +41,4 @@ <h4 class="mb-3 mt-5">
         </div>
 
     </div>
-{% endblock %}
+{% endblock %}
diff --git a/src/coffee/templates/_benchmark_card.html → ...ffee/templates/macros/benchmark_card.html b/src/coffee/templates/_benchmark_card.html → ...ffee/templates/macros/benchmark_card.html
@@ -1,3 +1,4 @@
+{% macro benchmark_card(show_benchmark_header, benchmark_definition) %}
 <div class="container text-start border border-2 rounded-4 p-5 box-shadow-light">
     {% if show_benchmark_header %}
         <div class="row mb-4">
@@ -21,3 +22,4 @@ <h2 class="mb-0">{{ benchmark_definition.name() }} Benchmark</h2>
         </div>
     </div>
 </div>
+{%- endmacro %}
diff --git a/src/coffee/templates/macros/breadcrumb.html b/src/coffee/templates/macros/breadcrumb.html
@@ -0,0 +1,16 @@
+{% macro breadcrumb(benchmark_score, benchmark_definition) %}
+  <nav class="pb-3" style="--bs-breadcrumb-divider: '>';" aria-label="breadcrumb">
+      <ol class="breadcrumb">
+          <li class="breadcrumb-item"><a href="index.html">ML Commons</a></li>
+          <li class="breadcrumb-item"><a href="benchmarks.html">Benchmarks</a></li>
+          {% if benchmark_score %}
+              <li class="breadcrumb-item"><a
+                      href="{{ benchmark_score.benchmark_definition.path_name() }}.html">{{ benchmark_score.benchmark_definition.name() }}</a>
+              </li>
+              <li class="breadcrumb-item active" aria-current="page">{{ benchmark_score.sut.name }} Report</li>
+          {% elif benchmark_definition %}
+              <li class="breadcrumb-item active" aria-current="page">{{ benchmark_definition.name() }}</li>
+          {% endif %}
+      </ol>
+  </nav>
+{%- endmacro %}
diff --git a/src/coffee/templates/macros/interpret_safety_ratings.html b/src/coffee/templates/macros/interpret_safety_ratings.html
@@ -0,0 +1,40 @@
+{% macro interpret_safety_ratings(stars_description) %}
+    <div class="text-start mt-5" id="benchmark-legend">
+        <h2 class="mb-4">How to Interpret Safety Ratings?</h2>
+        <p>Description goes here lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+            incididunt ut labore et dolore magna aliqua.
+            Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+            consequat.</p>
+    </div>
+
+    <div class="text-start ps-5 mt-5 rounded-top-4 bg-darker">
+        &nbsp;
+    </div>
+    <div class="card-group text-start">
+        {% for i in range(5, 2, -1) %}
+            <div class="card px-5 py-3 border-0 rounded-0 bg-darker">
+                <h3 class="card-title">{{ stars_description[i]['rank'] }}</h3>
+                <div class="d-grid gap-2 d-sm-flex my-2">
+                    {{ (i) | display_stars("sm") }}
+                </div>
+                <p>{{ stars_description[i]['short_description'] }}</p>
+            </div>
+        {% endfor %}
+    </div>
+
+    <div class="card-group text-start">
+        {% for i in range(2, 0, -1) %}
+            <div class="card px-5 py-3 border-0 rounded-0 bg-darker">
+                <h3 class="card-title">{{ stars_description[i]["rank"] }}</h3>
+                <div class="d-grid gap-2 d-sm-flex my-2">
+                    {{ (i) | display_stars("sm") }}
+                </div>
+                <p>{{ stars_description[i]["short_description"] }}</p>
+            </div>
+        {% endfor %}
+        <div class="card p-5 border-0 rounded-0 bg-darker">
+        </div>
+    </div>
+    <div class="text-start ps-5 rounded-bottom-4 bg-darker">
+    </div>
+{%- endmacro %}
diff --git a/src/coffee/templates/_sut_card.html → src/coffee/templates/macros/sut_card.html b/src/coffee/templates/_sut_card.html → src/coffee/templates/macros/sut_card.html
@@ -1,3 +1,4 @@
+{% macro sut_card(benchmark_score, stars_description) %}
 <div class="container text-start border border-2 rounded-4 p-5 box-shadow-light">
     <div class="row">
         <div class="col pe-5">
@@ -38,3 +39,4 @@ <h2 class="mb-0">{{ stars_description[benchmark_score.stars() | round | int]["ra
         </div>
     </div>
 </div>
+{%- endmacro %}
diff --git a/src/coffee/templates/macros/use_harms_limitations.html b/src/coffee/templates/macros/use_harms_limitations.html
@@ -0,0 +1,47 @@
+{% macro use_harms_limitations(benchmark_definition) %}
+  <div class="row text-start mb-5">
+      <div class="col">
+          <h2>Use Case</h2>
+          <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
+              et dolore quis nostrud exercitation ullamco laboris magna aliqua.</p>
+          <ul class="list-group">
+              <li class="list-group-item border-0 p-0 pb-1"><strong>Application:</strong> Lorem ipsum dolor sit
+                  amet
+              </li>
+              <li class="list-group-item border-0 p-0 pb-1"><strong>User:</strong> General Public</li>
+              <li class="list-group-item border-0 p-0 pb-1"><strong>Region:</strong> North America, Canada</li>
+          </ul>
+
+      </div>
+      <div class="col-2"></div>
+      <div class="col">
+          <h2>Harms Tested</h2>
+          <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
+              et dolore quis nostrud exercitation ullamco laboris magna aliqua.</p>
+          <ul class="list-group">
+              {% for harm in benchmark_definition.harms() %}
+                  <li class="list-group-item border-0 p-0 pb-1"><strong>{{ harm.name() }}:</strong> Explanation of
+                      harm goes here
+                  </li>
+              {% endfor %}
+          </ul>
+
+      </div>
+  </div>
+
+  <div class="card px-5 border-0 rounded-4 bg-darker text-start">
+      <h3 class="mb-3 mt-5">
+          Limitations
+      </h3>
+      <ul class="list-group mb-5">
+          <li class="list-group-item bg-darker border-0 p-0 pb-1"><strong>Tests are not comprehensive:</strong>
+              Explain what this means in a line here lorem ipsum
+              dolor sit amet.
+          </li>
+          <li class="list-group-item bg-darker border-0 p-0 pb-1"><strong>This is not a production
+              testing:</strong> Explain what this means in a line here lorem ipsum
+              dolor sit amet.
+          </li>
+      </ul>
+  </div>
+{%- endmacro %}
diff --git a/src/coffee/templates/test_report.html b/src/coffee/templates/test_report.html
@@ -1,11 +1,16 @@
+{% from "macros/breadcrumb.html" import breadcrumb %}
+{% from "macros/interpret_safety_ratings.html" import interpret_safety_ratings %}
+{% from "macros/sut_card.html" import sut_card %}
+{% from "macros/use_harms_limitations.html" import use_harms_limitations %}
+
 {% extends "base.html" %}
 
 {% block title %}{{ benchmark_name }} Benchmark{% endblock %}
 
 {% block content %}
     <div class="px-4 py-5 my-5 text-center container-lg mx-auto">
 
-        {% include "_breadcrumb.html" %}
+        {{ breadcrumb(benchmark_score, benchmark_score.benchmark_definition) }}
 
         <div class="text-start mb-5">
             <h2>Test Report</h2>
@@ -18,9 +23,9 @@ <h1>{{ benchmark_score.sut.name }} - {{ benchmark_score.benchmark_definition.nam
             </div>
         </div>
 
-        {% include "_use_harms_limitations.html" %}
+        {{ use_harms_limitations(benchmark_score.benchmark_definition) }}
 
-        {% include "_interpret_safety_ratings.html" %}
+        {{ interpret_safety_ratings(stars_description) }}
 
         <div class="text-start my-5">
             <h2 class="mb-3">Overall Safety Rating</h2>
@@ -29,7 +34,7 @@ <h2 class="mb-3">Overall Safety Rating</h2>
                 nostrud</p>
         </div>
 
-        {% include "_sut_card.html" %}
+        {{ sut_card(benchmark_score, stars_description) }}
 
         <div class="text-start my-5">
             <h2 class="mb-3">Tests Run</h2>

diff --git a/tests/templates/conftest.py b/tests/templates/conftest.py
@@ -0,0 +1,57 @@
+from itertools import groupby
+import pytest
+import pathlib
+
+from jinja2 import Environment, FileSystemLoader
+
+from coffee.helm import HelmSut
+from coffee.benchmark import (
+    GeneralChatBotBenchmarkDefinition,
+    BiasHarmDefinition,
+    HarmScore,
+    BenchmarkScore,
+    ToxicityHarmDefinition,
+)
+from coffee.static_site_generator import STARS_DESCRIPTION, display_stars
+
+
+def _benchmark_score() -> BenchmarkScore:
+    bd = GeneralChatBotBenchmarkDefinition()
+    bs = BenchmarkScore(
+        bd,
+        HelmSut.GPT2,
+        [
+            HarmScore(BiasHarmDefinition(), 0.5),
+            HarmScore(ToxicityHarmDefinition(), 0.8),
+        ],
+    )
+    return bs
+
+
+@pytest.fixture()
+def benchmark_score() -> BenchmarkScore:
+    return _benchmark_score()
+
+
+@pytest.fixture()
+def grouped_benchmark_scores() -> dict[str, list[BenchmarkScore]]:
+    benchmark_scores_dict = {}
+    for benchmark_definition, grouped_benchmark_scores in groupby(
+        [_benchmark_score()], lambda x: x.benchmark_definition
+    ):
+        grouped_benchmark_scores_list: list = list(grouped_benchmark_scores)
+        benchmark_scores_dict[benchmark_definition] = grouped_benchmark_scores_list
+    return benchmark_scores_dict
+
+
+@pytest.fixture()
+def template_env() -> Environment:
+    template_dir = pathlib.Path(__file__).parent.parent.parent / "src" / "coffee" / "templates"
+    env = Environment(loader=FileSystemLoader(template_dir))
+    env.filters["display_stars"] = display_stars
+    return env
+
+
+@pytest.fixture()
+def stars_description() -> dict[int, dict[str, str]]:
+    return STARS_DESCRIPTION
diff --git a/tests/templates/macros/test_benchmark_card.py b/tests/templates/macros/test_benchmark_card.py
@@ -0,0 +1,8 @@
+def test_display_benchmark_card(benchmark_score, template_env):
+    template = template_env.get_template("macros/benchmark_card.html")
+    result = template.module.benchmark_card(True, benchmark_score.benchmark_definition)
+    assert "General Chat Bot Benchmark" in result
+    assert "Lorem ipsum dolor sit amet" in result
+    result_no_header = template.module.benchmark_card(False, benchmark_score.benchmark_definition)
+    assert "General Chat Bot Benchmark" not in result_no_header
+    assert "Lorem ipsum dolor sit amet" in result