Skip to content

Commit

Permalink
Move partials to macros and tests (#59)
Browse files Browse the repository at this point in the history
* Move partials to macros for testibility
* Add tests for components and pages
  • Loading branch information
mkly authored Feb 2, 2024
1 parent 4b7899d commit d3deae2
Show file tree
Hide file tree
Showing 20 changed files with 263 additions and 111 deletions.
14 changes: 0 additions & 14 deletions src/coffee/templates/_breadcrumb.html

This file was deleted.

38 changes: 0 additions & 38 deletions src/coffee/templates/_interpret_safety_ratings.html

This file was deleted.

49 changes: 0 additions & 49 deletions src/coffee/templates/_use_harms_limitations.html

This file was deleted.

10 changes: 7 additions & 3 deletions src/coffee/templates/benchmark.html
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
{% from "macros/breadcrumb.html" import breadcrumb %}
{% from "macros/interpret_safety_ratings.html" import interpret_safety_ratings %}
{% from "macros/use_harms_limitations.html" import use_harms_limitations %}

{% extends "base.html" %}

{% block title %}{{ benchmark_name }} Benchmark{% endblock %}

{% block content %}
<div class="px-4 py-5 my-5 text-center container-lg mx-auto">

{% include "_breadcrumb.html" %}
{{ breadcrumb(None, benchmark_definition) }}

<div class="text-start mb-5">
<h1>{{ benchmark_definition.name() }} <span class="btn beta ms-4 mb-2">Provisional</span></h1>
Expand All @@ -16,9 +20,9 @@ <h1>{{ benchmark_definition.name() }} <span class="btn beta ms-4 mb-2">Provision
</div>
</div>

{% include "_use_harms_limitations.html" %}
{{ use_harms_limitations(benchmark_definition) }}

{% include "_interpret_safety_ratings.html" %}
{{ interpret_safety_ratings(stars_description) }}
&nbsp;

<div class="text-start my-5">
Expand Down
9 changes: 6 additions & 3 deletions src/coffee/templates/benchmarks.html
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
{% from "macros/benchmark_card.html" import benchmark_card %}
{% from "macros/breadcrumb.html" import breadcrumb %}

{% extends "base.html" %}

{% block title %}Benchmarks{% endblock %}

{% block content %}
<div class="px-4 py-5 my-5 text-center container-lg mx-auto">

{% include "_breadcrumb.html" %}
{{ breadcrumb(benchmark_score, benchmark_definition) }}

<div class="text-start mb-5">
<h1>AIP Safety Benchmarks <span type="button" class="btn beta ms-4 mb-2">Provisional</span></h1>
Expand All @@ -21,7 +24,7 @@ <h1>AIP Safety Benchmarks <span type="button" class="btn beta ms-4 mb-2">Provisi
</div>

{% for benchmark_definition in grouped_benchmark_scores %}
{% include "_benchmark_card.html" %}
{{ benchmark_card(True, benchmark_definition) }}
<div class="mx-5">&nbsp;</div>
{% endfor %}

Expand All @@ -38,4 +41,4 @@ <h4 class="mb-3 mt-5">
</div>

</div>
{% endblock %}
{% endblock %}
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{% macro benchmark_card(show_benchmark_header, benchmark_definition) %}
<div class="container text-start border border-2 rounded-4 p-5 box-shadow-light">
{% if show_benchmark_header %}
<div class="row mb-4">
Expand All @@ -21,3 +22,4 @@ <h2 class="mb-0">{{ benchmark_definition.name() }} Benchmark</h2>
</div>
</div>
</div>
{%- endmacro %}
16 changes: 16 additions & 0 deletions src/coffee/templates/macros/breadcrumb.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{% macro breadcrumb(benchmark_score, benchmark_definition) %}
<nav class="pb-3" style="--bs-breadcrumb-divider: '>';" aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="index.html">ML Commons</a></li>
<li class="breadcrumb-item"><a href="benchmarks.html">Benchmarks</a></li>
{% if benchmark_score %}
<li class="breadcrumb-item"><a
href="{{ benchmark_score.benchmark_definition.path_name() }}.html">{{ benchmark_score.benchmark_definition.name() }}</a>
</li>
<li class="breadcrumb-item active" aria-current="page">{{ benchmark_score.sut.name }} Report</li>
{% elif benchmark_definition %}
<li class="breadcrumb-item active" aria-current="page">{{ benchmark_definition.name() }}</li>
{% endif %}
</ol>
</nav>
{%- endmacro %}
40 changes: 40 additions & 0 deletions src/coffee/templates/macros/interpret_safety_ratings.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{% macro interpret_safety_ratings(stars_description) %}
<div class="text-start mt-5" id="benchmark-legend">
<h2 class="mb-4">How to Interpret Safety Ratings?</h2>
<p>Description goes here lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
</div>

<div class="text-start ps-5 mt-5 rounded-top-4 bg-darker">
&nbsp;
</div>
<div class="card-group text-start">
{% for i in range(5, 2, -1) %}
<div class="card px-5 py-3 border-0 rounded-0 bg-darker">
<h3 class="card-title">{{ stars_description[i]['rank'] }}</h3>
<div class="d-grid gap-2 d-sm-flex my-2">
{{ (i) | display_stars("sm") }}
</div>
<p>{{ stars_description[i]['short_description'] }}</p>
</div>
{% endfor %}
</div>

<div class="card-group text-start">
{% for i in range(2, 0, -1) %}
<div class="card px-5 py-3 border-0 rounded-0 bg-darker">
<h3 class="card-title">{{ stars_description[i]["rank"] }}</h3>
<div class="d-grid gap-2 d-sm-flex my-2">
{{ (i) | display_stars("sm") }}
</div>
<p>{{ stars_description[i]["short_description"] }}</p>
</div>
{% endfor %}
<div class="card p-5 border-0 rounded-0 bg-darker">
</div>
</div>
<div class="text-start ps-5 rounded-bottom-4 bg-darker">
</div>
{%- endmacro %}
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{% macro sut_card(benchmark_score, stars_description) %}
<div class="container text-start border border-2 rounded-4 p-5 box-shadow-light">
<div class="row">
<div class="col pe-5">
Expand Down Expand Up @@ -38,3 +39,4 @@ <h2 class="mb-0">{{ stars_description[benchmark_score.stars() | round | int]["ra
</div>
</div>
</div>
{%- endmacro %}
47 changes: 47 additions & 0 deletions src/coffee/templates/macros/use_harms_limitations.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{% macro use_harms_limitations(benchmark_definition) %}
<div class="row text-start mb-5">
<div class="col">
<h2>Use Case</h2>
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
et dolore quis nostrud exercitation ullamco laboris magna aliqua.</p>
<ul class="list-group">
<li class="list-group-item border-0 p-0 pb-1"><strong>Application:</strong> Lorem ipsum dolor sit
amet
</li>
<li class="list-group-item border-0 p-0 pb-1"><strong>User:</strong> General Public</li>
<li class="list-group-item border-0 p-0 pb-1"><strong>Region:</strong> North America, Canada</li>
</ul>

</div>
<div class="col-2"></div>
<div class="col">
<h2>Harms Tested</h2>
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore
et dolore quis nostrud exercitation ullamco laboris magna aliqua.</p>
<ul class="list-group">
{% for harm in benchmark_definition.harms() %}
<li class="list-group-item border-0 p-0 pb-1"><strong>{{ harm.name() }}:</strong> Explanation of
harm goes here
</li>
{% endfor %}
</ul>

</div>
</div>

<div class="card px-5 border-0 rounded-4 bg-darker text-start">
<h3 class="mb-3 mt-5">
Limitations
</h3>
<ul class="list-group mb-5">
<li class="list-group-item bg-darker border-0 p-0 pb-1"><strong>Tests are not comprehensive:</strong>
Explain what this means in a line here lorem ipsum
dolor sit amet.
</li>
<li class="list-group-item bg-darker border-0 p-0 pb-1"><strong>This is not a production
testing:</strong> Explain what this means in a line here lorem ipsum
dolor sit amet.
</li>
</ul>
</div>
{%- endmacro %}
13 changes: 9 additions & 4 deletions src/coffee/templates/test_report.html
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
{% from "macros/breadcrumb.html" import breadcrumb %}
{% from "macros/interpret_safety_ratings.html" import interpret_safety_ratings %}
{% from "macros/sut_card.html" import sut_card %}
{% from "macros/use_harms_limitations.html" import use_harms_limitations %}

{% extends "base.html" %}

{% block title %}{{ benchmark_name }} Benchmark{% endblock %}

{% block content %}
<div class="px-4 py-5 my-5 text-center container-lg mx-auto">

{% include "_breadcrumb.html" %}
{{ breadcrumb(benchmark_score, benchmark_score.benchmark_definition) }}

<div class="text-start mb-5">
<h2>Test Report</h2>
Expand All @@ -18,9 +23,9 @@ <h1>{{ benchmark_score.sut.name }} - {{ benchmark_score.benchmark_definition.nam
</div>
</div>

{% include "_use_harms_limitations.html" %}
{{ use_harms_limitations(benchmark_score.benchmark_definition) }}

{% include "_interpret_safety_ratings.html" %}
{{ interpret_safety_ratings(stars_description) }}

<div class="text-start my-5">
<h2 class="mb-3">Overall Safety Rating</h2>
Expand All @@ -29,7 +34,7 @@ <h2 class="mb-3">Overall Safety Rating</h2>
nostrud</p>
</div>

{% include "_sut_card.html" %}
{{ sut_card(benchmark_score, stars_description) }}

<div class="text-start my-5">
<h2 class="mb-3">Tests Run</h2>
Expand Down
57 changes: 57 additions & 0 deletions tests/templates/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from itertools import groupby
import pytest
import pathlib

from jinja2 import Environment, FileSystemLoader

from coffee.helm import HelmSut
from coffee.benchmark import (
GeneralChatBotBenchmarkDefinition,
BiasHarmDefinition,
HarmScore,
BenchmarkScore,
ToxicityHarmDefinition,
)
from coffee.static_site_generator import STARS_DESCRIPTION, display_stars


def _benchmark_score() -> BenchmarkScore:
bd = GeneralChatBotBenchmarkDefinition()
bs = BenchmarkScore(
bd,
HelmSut.GPT2,
[
HarmScore(BiasHarmDefinition(), 0.5),
HarmScore(ToxicityHarmDefinition(), 0.8),
],
)
return bs


@pytest.fixture()
def benchmark_score() -> BenchmarkScore:
return _benchmark_score()


@pytest.fixture()
def grouped_benchmark_scores() -> dict[str, list[BenchmarkScore]]:
benchmark_scores_dict = {}
for benchmark_definition, grouped_benchmark_scores in groupby(
[_benchmark_score()], lambda x: x.benchmark_definition
):
grouped_benchmark_scores_list: list = list(grouped_benchmark_scores)
benchmark_scores_dict[benchmark_definition] = grouped_benchmark_scores_list
return benchmark_scores_dict


@pytest.fixture()
def template_env() -> Environment:
template_dir = pathlib.Path(__file__).parent.parent.parent / "src" / "coffee" / "templates"
env = Environment(loader=FileSystemLoader(template_dir))
env.filters["display_stars"] = display_stars
return env


@pytest.fixture()
def stars_description() -> dict[int, dict[str, str]]:
return STARS_DESCRIPTION
8 changes: 8 additions & 0 deletions tests/templates/macros/test_benchmark_card.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
def test_display_benchmark_card(benchmark_score, template_env):
template = template_env.get_template("macros/benchmark_card.html")
result = template.module.benchmark_card(True, benchmark_score.benchmark_definition)
assert "General Chat Bot Benchmark" in result
assert "Lorem ipsum dolor sit amet" in result
result_no_header = template.module.benchmark_card(False, benchmark_score.benchmark_definition)
assert "General Chat Bot Benchmark" not in result_no_header
assert "Lorem ipsum dolor sit amet" in result
Loading

0 comments on commit d3deae2

Please sign in to comment.