diff --git a/src/coffee/templates/_breadcrumb.html b/src/coffee/templates/_breadcrumb.html
deleted file mode 100644
index 4326bd3a..00000000
--- a/src/coffee/templates/_breadcrumb.html
+++ /dev/null
@@ -1,14 +0,0 @@
-<nav class="pb-3" style="--bs-breadcrumb-divider: '>';" aria-label="breadcrumb">
-    <ol class="breadcrumb">
-        <li class="breadcrumb-item"><a href="index.html">ML Commons</a></li>
-        <li class="breadcrumb-item"><a href="benchmarks.html">Benchmarks</a></li>
-        {% if benchmark_score %}
-            <li class="breadcrumb-item"><a
-                    href="{{ benchmark_score.benchmark_definition.path_name() }}.html">{{ benchmark_score.benchmark_definition.name() }}</a>
-            </li>
-            <li class="breadcrumb-item active" aria-current="page">{{ benchmark_score.sut.name }} Report</li>
-        {% elif benchmark_definition %}
-            <li class="breadcrumb-item active" aria-current="page">{{ benchmark_definition.name() }}</li>
-        {% endif %}
-    </ol>
-</nav>
\ No newline at end of file
diff --git a/src/coffee/templates/_interpret_safety_ratings.html b/src/coffee/templates/_interpret_safety_ratings.html
deleted file mode 100644
index 62eb53f1..00000000
--- a/src/coffee/templates/_interpret_safety_ratings.html
+++ /dev/null
@@ -1,38 +0,0 @@
-<div class="text-start mt-5" id="benchmark-legend">
-    <h2 class="mb-4">How to Interpret Safety Ratings?</h2>
-    <p>Description goes here lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-        incididunt ut labore et dolore magna aliqua.
-        Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
-        consequat.</p>
-</div>
-
-<div class="text-start ps-5 mt-5 rounded-top-4 bg-darker">
-    &nbsp;
-</div>
-<div class="card-group text-start">
-    {% for i in range(5, 2, -1) %}
-        <div class="card px-5 py-3 border-0 rounded-0 bg-darker">
-            <h3 class="card-title">{{ stars_description[i]['rank'] }}</h3>
-            <div class="d-grid gap-2 d-sm-flex my-2">
-                {{ (i) | display_stars("sm") }}
-            </div>
-            <p>{{ stars_description[i]['short_description'] }}</p>
-        </div>
-    {% endfor %}
-</div>
-
-<div class="card-group text-start">
-    {% for i in range(2, 0, -1) %}
-        <div class="card px-5 py-3 border-0 rounded-0 bg-darker">
-            <h3 class="card-title">{{ stars_description[i]["rank"] }}</h3>
-            <div class="d-grid gap-2 d-sm-flex my-2">
-                {{ (i) | display_stars("sm") }}
-            </div>
-            <p>{{ stars_description[i]["short_description"] }}</p>
-        </div>
-    {% endfor %}
-    <div class="card p-5 border-0 rounded-0 bg-darker">
-    </div>
-</div>
-<div class="text-start ps-5 rounded-bottom-4 bg-darker">
-</div>
\ No newline at end of file
diff --git a/src/coffee/templates/_use_harms_limitations.html b/src/coffee/templates/_use_harms_limitations.html
deleted file mode 100644
index 1411ba26..00000000
--- a/src/coffee/templates/_use_harms_limitations.html
+++ /dev/null
@@ -1,49 +0,0 @@
-{% if not benchmark_definition %}
-    {% set benchmark_definition = benchmark_score.benchmark_definition %}
-{% endif %}
-
-<div class="row text-start mb-5">
-    <div class="col">
-        <h2>Use Case</h2>
-        <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
-            et dolore quis nostrud exercitation ullamco laboris magna aliqua.</p>
-        <ul class="list-group">
-            <li class="list-group-item border-0 p-0 pb-1"><strong>Application:</strong> Lorem ipsum dolor sit
-                amet
-            </li>
-            <li class="list-group-item border-0 p-0 pb-1"><strong>User:</strong> General Public</li>
-            <li class="list-group-item border-0 p-0 pb-1"><strong>Region:</strong> North America, Canada</li>
-        </ul>
-
-    </div>
-    <div class="col-2"></div>
-    <div class="col">
-        <h2>Harms Tested</h2>
-        <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
-            et dolore quis nostrud exercitation ullamco laboris magna aliqua.</p>
-        <ul class="list-group">
-            {% for harm in benchmark_definition.harms() %}
-                <li class="list-group-item border-0 p-0 pb-1"><strong>{{ harm.name() }}:</strong> Explanation of
-                    harm goes here
-                </li>
-            {% endfor %}
-        </ul>
-
-    </div>
-</div>
-
-<div class="card px-5 border-0 rounded-4 bg-darker text-start">
-    <h3 class="mb-3 mt-5">
-        Limitations
-    </h3>
-    <ul class="list-group mb-5">
-        <li class="list-group-item bg-darker border-0 p-0 pb-1"><strong>Tests are not comprehensive:</strong>
-            Explain what this means in a line here lorem ipsum
-            dolor sit amet.
-        </li>
-        <li class="list-group-item bg-darker border-0 p-0 pb-1"><strong>This is not a production
-            testing:</strong> Explain what this means in a line here lorem ipsum
-            dolor sit amet.
-        </li>
-    </ul>
-</div>
\ No newline at end of file
diff --git a/src/coffee/templates/benchmark.html b/src/coffee/templates/benchmark.html
index dc08b2d8..cef6e20b 100644
--- a/src/coffee/templates/benchmark.html
+++ b/src/coffee/templates/benchmark.html
@@ -1,3 +1,7 @@
+{% from "macros/breadcrumb.html" import breadcrumb %}
+{% from "macros/interpret_safety_ratings.html" import interpret_safety_ratings %}
+{% from "macros/use_harms_limitations.html" import use_harms_limitations %}
+
 {% extends "base.html" %}
 
 {% block title %}{{ benchmark_name }} Benchmark{% endblock %}
@@ -5,7 +9,7 @@
 {% block content %}
     <div class="px-4 py-5 my-5 text-center container-lg mx-auto">
 
-        {% include "_breadcrumb.html" %}
+        {{ breadcrumb(None, benchmark_definition) }}
 
         <div class="text-start mb-5">
             <h1>{{ benchmark_definition.name() }} <span class="btn beta ms-4 mb-2">Provisional</span></h1>
@@ -16,9 +20,9 @@ <h1>{{ benchmark_definition.name() }} <span class="btn beta ms-4 mb-2">Provision
             </div>
         </div>
 
-        {% include "_use_harms_limitations.html" %}
+        {{ use_harms_limitations(benchmark_definition) }}
 
-        {% include "_interpret_safety_ratings.html" %}
+        {{ interpret_safety_ratings(stars_description) }}
         &nbsp;
 
         <div class="text-start my-5">
diff --git a/src/coffee/templates/benchmarks.html b/src/coffee/templates/benchmarks.html
index 036de67f..fcbbc18d 100644
--- a/src/coffee/templates/benchmarks.html
+++ b/src/coffee/templates/benchmarks.html
@@ -1,3 +1,6 @@
+{% from "macros/benchmark_card.html" import benchmark_card %}
+{% from "macros/breadcrumb.html" import breadcrumb %}
+
 {% extends "base.html" %}
 
 {% block title %}Benchmarks{% endblock %}
@@ -5,7 +8,7 @@
 {% block content %}
     <div class="px-4 py-5 my-5 text-center container-lg mx-auto">
 
-        {% include "_breadcrumb.html" %}
+        {{ breadcrumb(benchmark_score, benchmark_definition) }}
 
         <div class="text-start mb-5">
             <h1>AIP Safety Benchmarks <span type="button" class="btn beta ms-4 mb-2">Provisional</span></h1>
@@ -21,7 +24,7 @@ <h1>AIP Safety Benchmarks <span type="button" class="btn beta ms-4 mb-2">Provisi
         </div>
 
         {% for benchmark_definition in grouped_benchmark_scores %}
-            {% include "_benchmark_card.html" %}
+            {{ benchmark_card(True, benchmark_definition) }}
             <div class="mx-5">&nbsp;</div>
         {% endfor %}
 
@@ -38,4 +41,4 @@ <h4 class="mb-3 mt-5">
         </div>
 
     </div>
-{% endblock %}
\ No newline at end of file
+{% endblock %}
diff --git a/src/coffee/templates/_benchmark_card.html b/src/coffee/templates/macros/benchmark_card.html
similarity index 91%
rename from src/coffee/templates/_benchmark_card.html
rename to src/coffee/templates/macros/benchmark_card.html
index 5203fe0a..f50a5ecf 100644
--- a/src/coffee/templates/_benchmark_card.html
+++ b/src/coffee/templates/macros/benchmark_card.html
@@ -1,3 +1,4 @@
+{% macro benchmark_card(show_benchmark_header, benchmark_definition) %}
 <div class="container text-start border border-2 rounded-4 p-5 box-shadow-light">
     {% if show_benchmark_header %}
         <div class="row mb-4">
@@ -21,3 +22,4 @@ <h2 class="mb-0">{{ benchmark_definition.name() }} Benchmark</h2>
         </div>
     </div>
 </div>
+{%- endmacro %}
diff --git a/src/coffee/templates/macros/breadcrumb.html b/src/coffee/templates/macros/breadcrumb.html
new file mode 100644
index 00000000..7f6835a6
--- /dev/null
+++ b/src/coffee/templates/macros/breadcrumb.html
@@ -0,0 +1,16 @@
+{% macro breadcrumb(benchmark_score, benchmark_definition) %}
+  <nav class="pb-3" style="--bs-breadcrumb-divider: '>';" aria-label="breadcrumb">
+      <ol class="breadcrumb">
+          <li class="breadcrumb-item"><a href="index.html">ML Commons</a></li>
+          <li class="breadcrumb-item"><a href="benchmarks.html">Benchmarks</a></li>
+          {% if benchmark_score %}
+              <li class="breadcrumb-item"><a
+                      href="{{ benchmark_score.benchmark_definition.path_name() }}.html">{{ benchmark_score.benchmark_definition.name() }}</a>
+              </li>
+              <li class="breadcrumb-item active" aria-current="page">{{ benchmark_score.sut.name }} Report</li>
+          {% elif benchmark_definition %}
+              <li class="breadcrumb-item active" aria-current="page">{{ benchmark_definition.name() }}</li>
+          {% endif %}
+      </ol>
+  </nav>
+{%- endmacro %}
diff --git a/src/coffee/templates/macros/interpret_safety_ratings.html b/src/coffee/templates/macros/interpret_safety_ratings.html
new file mode 100644
index 00000000..fc418527
--- /dev/null
+++ b/src/coffee/templates/macros/interpret_safety_ratings.html
@@ -0,0 +1,40 @@
+{% macro interpret_safety_ratings(stars_description) %}
+    <div class="text-start mt-5" id="benchmark-legend">
+        <h2 class="mb-4">How to Interpret Safety Ratings?</h2>
+        <p>Description goes here lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+            incididunt ut labore et dolore magna aliqua.
+            Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+            consequat.</p>
+    </div>
+
+    <div class="text-start ps-5 mt-5 rounded-top-4 bg-darker">
+        &nbsp;
+    </div>
+    <div class="card-group text-start">
+        {% for i in range(5, 2, -1) %}
+            <div class="card px-5 py-3 border-0 rounded-0 bg-darker">
+                <h3 class="card-title">{{ stars_description[i]['rank'] }}</h3>
+                <div class="d-grid gap-2 d-sm-flex my-2">
+                    {{ (i) | display_stars("sm") }}
+                </div>
+                <p>{{ stars_description[i]['short_description'] }}</p>
+            </div>
+        {% endfor %}
+    </div>
+
+    <div class="card-group text-start">
+        {% for i in range(2, 0, -1) %}
+            <div class="card px-5 py-3 border-0 rounded-0 bg-darker">
+                <h3 class="card-title">{{ stars_description[i]["rank"] }}</h3>
+                <div class="d-grid gap-2 d-sm-flex my-2">
+                    {{ (i) | display_stars("sm") }}
+                </div>
+                <p>{{ stars_description[i]["short_description"] }}</p>
+            </div>
+        {% endfor %}
+        <div class="card p-5 border-0 rounded-0 bg-darker">
+        </div>
+    </div>
+    <div class="text-start ps-5 rounded-bottom-4 bg-darker">
+    </div>
+{%- endmacro %}
diff --git a/src/coffee/templates/_sut_card.html b/src/coffee/templates/macros/sut_card.html
similarity index 96%
rename from src/coffee/templates/_sut_card.html
rename to src/coffee/templates/macros/sut_card.html
index 3eeddd2d..cb383008 100644
--- a/src/coffee/templates/_sut_card.html
+++ b/src/coffee/templates/macros/sut_card.html
@@ -1,3 +1,4 @@
+{% macro sut_card(benchmark_score, stars_description) %}
 <div class="container text-start border border-2 rounded-4 p-5 box-shadow-light">
     <div class="row">
         <div class="col pe-5">
@@ -38,3 +39,4 @@ <h2 class="mb-0">{{ stars_description[benchmark_score.stars() | round | int]["ra
         </div>
     </div>
 </div>
+{%- endmacro %}
diff --git a/src/coffee/templates/macros/use_harms_limitations.html b/src/coffee/templates/macros/use_harms_limitations.html
new file mode 100644
index 00000000..398fef15
--- /dev/null
+++ b/src/coffee/templates/macros/use_harms_limitations.html
@@ -0,0 +1,47 @@
+{% macro use_harms_limitations(benchmark_definition) %}
+  <div class="row text-start mb-5">
+      <div class="col">
+          <h2>Use Case</h2>
+          <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
+              et dolore quis nostrud exercitation ullamco laboris magna aliqua.</p>
+          <ul class="list-group">
+              <li class="list-group-item border-0 p-0 pb-1"><strong>Application:</strong> Lorem ipsum dolor sit
+                  amet
+              </li>
+              <li class="list-group-item border-0 p-0 pb-1"><strong>User:</strong> General Public</li>
+              <li class="list-group-item border-0 p-0 pb-1"><strong>Region:</strong> North America, Canada</li>
+          </ul>
+
+      </div>
+      <div class="col-2"></div>
+      <div class="col">
+          <h2>Harms Tested</h2>
+          <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
+              et dolore quis nostrud exercitation ullamco laboris magna aliqua.</p>
+          <ul class="list-group">
+              {% for harm in benchmark_definition.harms() %}
+                  <li class="list-group-item border-0 p-0 pb-1"><strong>{{ harm.name() }}:</strong> Explanation of
+                      harm goes here
+                  </li>
+              {% endfor %}
+          </ul>
+
+      </div>
+  </div>
+
+  <div class="card px-5 border-0 rounded-4 bg-darker text-start">
+      <h3 class="mb-3 mt-5">
+          Limitations
+      </h3>
+      <ul class="list-group mb-5">
+          <li class="list-group-item bg-darker border-0 p-0 pb-1"><strong>Tests are not comprehensive:</strong>
+              Explain what this means in a line here lorem ipsum
+              dolor sit amet.
+          </li>
+          <li class="list-group-item bg-darker border-0 p-0 pb-1"><strong>This is not a production
+              testing:</strong> Explain what this means in a line here lorem ipsum
+              dolor sit amet.
+          </li>
+      </ul>
+  </div>
+{%- endmacro %}
diff --git a/src/coffee/templates/test_report.html b/src/coffee/templates/test_report.html
index c6d6eadc..d7e425db 100644
--- a/src/coffee/templates/test_report.html
+++ b/src/coffee/templates/test_report.html
@@ -1,3 +1,8 @@
+{% from "macros/breadcrumb.html" import breadcrumb %}
+{% from "macros/interpret_safety_ratings.html" import interpret_safety_ratings %}
+{% from "macros/sut_card.html" import sut_card %}
+{% from "macros/use_harms_limitations.html" import use_harms_limitations %}
+
 {% extends "base.html" %}
 
 {% block title %}{{ benchmark_name }} Benchmark{% endblock %}
@@ -5,7 +10,7 @@
 {% block content %}
     <div class="px-4 py-5 my-5 text-center container-lg mx-auto">
 
-        {% include "_breadcrumb.html" %}
+        {{ breadcrumb(benchmark_score, benchmark_score.benchmark_definition) }}
 
         <div class="text-start mb-5">
             <h2>Test Report</h2>
@@ -18,9 +23,9 @@ <h1>{{ benchmark_score.sut.name }} - {{ benchmark_score.benchmark_definition.nam
             </div>
         </div>
 
-        {% include "_use_harms_limitations.html" %}
+        {{ use_harms_limitations(benchmark_score.benchmark_definition) }}
 
-        {% include "_interpret_safety_ratings.html" %}
+        {{ interpret_safety_ratings(stars_description) }}
 
         <div class="text-start my-5">
             <h2 class="mb-3">Overall Safety Rating</h2>
@@ -29,7 +34,7 @@ <h2 class="mb-3">Overall Safety Rating</h2>
                 nostrud</p>
         </div>
 
-        {% include "_sut_card.html" %}
+        {{ sut_card(benchmark_score, stars_description) }}
 
         <div class="text-start my-5">
             <h2 class="mb-3">Tests Run</h2>
diff --git a/tests/templates/conftest.py b/tests/templates/conftest.py
new file mode 100644
index 00000000..cf347949
--- /dev/null
+++ b/tests/templates/conftest.py
@@ -0,0 +1,57 @@
+from itertools import groupby
+import pytest
+import pathlib
+
+from jinja2 import Environment, FileSystemLoader
+
+from coffee.helm import HelmSut
+from coffee.benchmark import (
+    GeneralChatBotBenchmarkDefinition,
+    BiasHarmDefinition,
+    HarmScore,
+    BenchmarkScore,
+    ToxicityHarmDefinition,
+)
+from coffee.static_site_generator import STARS_DESCRIPTION, display_stars
+
+
+def _benchmark_score() -> BenchmarkScore:
+    bd = GeneralChatBotBenchmarkDefinition()
+    bs = BenchmarkScore(
+        bd,
+        HelmSut.GPT2,
+        [
+            HarmScore(BiasHarmDefinition(), 0.5),
+            HarmScore(ToxicityHarmDefinition(), 0.8),
+        ],
+    )
+    return bs
+
+
+@pytest.fixture()
+def benchmark_score() -> BenchmarkScore:
+    return _benchmark_score()
+
+
+@pytest.fixture()
+def grouped_benchmark_scores() -> dict[str, list[BenchmarkScore]]:
+    benchmark_scores_dict = {}
+    for benchmark_definition, grouped_benchmark_scores in groupby(
+        [_benchmark_score()], lambda x: x.benchmark_definition
+    ):
+        grouped_benchmark_scores_list: list = list(grouped_benchmark_scores)
+        benchmark_scores_dict[benchmark_definition] = grouped_benchmark_scores_list
+    return benchmark_scores_dict
+
+
+@pytest.fixture()
+def template_env() -> Environment:
+    template_dir = pathlib.Path(__file__).parent.parent.parent / "src" / "coffee" / "templates"
+    env = Environment(loader=FileSystemLoader(template_dir))
+    env.filters["display_stars"] = display_stars
+    return env
+
+
+@pytest.fixture()
+def stars_description() -> dict[int, dict[str, str]]:
+    return STARS_DESCRIPTION
diff --git a/tests/templates/macros/test_benchmark_card.py b/tests/templates/macros/test_benchmark_card.py
new file mode 100644
index 00000000..53391e34
--- /dev/null
+++ b/tests/templates/macros/test_benchmark_card.py
@@ -0,0 +1,8 @@
+def test_display_benchmark_card(benchmark_score, template_env):
+    template = template_env.get_template("macros/benchmark_card.html")
+    result = template.module.benchmark_card(True, benchmark_score.benchmark_definition)
+    assert "General Chat Bot Benchmark" in result
+    assert "Lorem ipsum dolor sit amet" in result
+    result_no_header = template.module.benchmark_card(False, benchmark_score.benchmark_definition)
+    assert "General Chat Bot Benchmark" not in result_no_header
+    assert "Lorem ipsum dolor sit amet" in result
diff --git a/tests/templates/macros/test_breadcrumb.py b/tests/templates/macros/test_breadcrumb.py
new file mode 100644
index 00000000..4f982526
--- /dev/null
+++ b/tests/templates/macros/test_breadcrumb.py
@@ -0,0 +1,10 @@
+def test_display_breadcrumb(benchmark_score, template_env):
+    template = template_env.get_template("macros/breadcrumb.html")
+    result = template.module.breadcrumb(benchmark_score, benchmark_score.benchmark_definition)
+    assert "Benchmarks" in result
+    assert "General Chat Bot" in result
+    assert 'href="general_chat_bot_benchmark.html"' in result
+    result = template.module.breadcrumb(None, benchmark_score.benchmark_definition)
+    assert "Benchmarks" in result
+    assert 'href="general_chat_bot_benchmark.html"' not in result
+    assert "General Chat Bot" in result
diff --git a/tests/templates/macros/test_interpret_safety_ratings.py b/tests/templates/macros/test_interpret_safety_ratings.py
new file mode 100644
index 00000000..e082d302
--- /dev/null
+++ b/tests/templates/macros/test_interpret_safety_ratings.py
@@ -0,0 +1,8 @@
+def test_interpret_safety_ratings(stars_description, template_env):
+    template = template_env.get_template("macros/interpret_safety_ratings.html")
+    result = template.module.interpret_safety_ratings(stars_description)
+    assert "How to Interpret Safety Ratings?" in result
+    assert "Description goes here" in result
+    assert "At or above moderate quality reference system." in result
+    assert "Below minimum quality reference system." in result
+    assert "Excellent" in result
diff --git a/tests/templates/macros/test_sut_card.py b/tests/templates/macros/test_sut_card.py
new file mode 100644
index 00000000..0ae18a84
--- /dev/null
+++ b/tests/templates/macros/test_sut_card.py
@@ -0,0 +1,6 @@
+def test_display_sut(benchmark_score, stars_description, template_env):
+    template = template_env.get_template("macros/sut_card.html")
+    result = template.module.sut_card(benchmark_score, stars_description)
+    assert 'Model performance can be compared to a "moderate quality"' in result
+    assert "66% safe responses" in result
+    assert "What does 'Fair' mean?" in result
diff --git a/tests/templates/macros/test_use_harms_limitations.py b/tests/templates/macros/test_use_harms_limitations.py
new file mode 100644
index 00000000..2cc88544
--- /dev/null
+++ b/tests/templates/macros/test_use_harms_limitations.py
@@ -0,0 +1,7 @@
+def test_use_harms_limitations(benchmark_score, template_env):
+    template = template_env.get_template("macros/use_harms_limitations.html")
+    result = template.module.use_harms_limitations(benchmark_score.benchmark_definition)
+    assert "Use Case" in result
+    assert "Lorem ipsum dolor sit amet" in result
+    assert "Harms Tested" in result
+    assert "Limitations" in result
diff --git a/tests/templates/test_benchmark.py b/tests/templates/test_benchmark.py
new file mode 100644
index 00000000..a8c88e27
--- /dev/null
+++ b/tests/templates/test_benchmark.py
@@ -0,0 +1,14 @@
+def test_benchmark(benchmark_score, template_env, stars_description, grouped_benchmark_scores):
+    template = template_env.get_template("benchmark.html")
+    result = template.render(
+        benchmark_score=benchmark_score,
+        benchmark_definition=benchmark_score.benchmark_definition,
+        grouped_benchmark_scores=grouped_benchmark_scores,
+        stars_description=stars_description,
+    )
+    assert "General Chat Bot" in result
+    assert "Harms Tested" in result
+    assert "How to Interpret Safety Ratings?" in result
+    assert "Below minimum quality reference system." in result
+    assert "AI Systems Evaluated" in result
+    assert "Privacy Policy" in result
diff --git a/tests/templates/test_benchmarks.py b/tests/templates/test_benchmarks.py
new file mode 100644
index 00000000..a0003533
--- /dev/null
+++ b/tests/templates/test_benchmarks.py
@@ -0,0 +1,11 @@
+def test_benchmarks(benchmark_score, template_env, stars_description, grouped_benchmark_scores):
+    template = template_env.get_template("benchmarks.html")
+    result = template.render(
+        benchmark_score=benchmark_score,
+        benchmark_definition=benchmark_score.benchmark_definition,
+        grouped_benchmark_scores=grouped_benchmark_scores,
+        stars_description=stars_description,
+    )
+    assert "AIP Safety Benchmarks" in result
+    assert "General Chat Bot Benchmark" in result
+    assert "Privacy Policy" in result
diff --git a/tests/templates/test_test_report.py b/tests/templates/test_test_report.py
new file mode 100644
index 00000000..f2ea1904
--- /dev/null
+++ b/tests/templates/test_test_report.py
@@ -0,0 +1,13 @@
+def test_test_report(benchmark_score, template_env, stars_description):
+    template = template_env.get_template("test_report.html")
+    result = template.render(
+        benchmark_score=benchmark_score,
+        stars_description=stars_description,
+    )
+    assert "Test Report" in result
+    assert "66% safe responses" in result
+    assert "Tests Run" in result
+    assert "Test Details" in result
+    assert "General Chat Bot" in result
+    assert "Don't see the tests you are looking for?" in result
+    assert "Privacy Policy" in result