Benchmark UID
-{{ content(benchmark_score.benchmark_definition, "uid") }}
+{{ benchmark_score.benchmark_definition.uid }}
diff --git a/src/modelbench/templates/content/general_purpose_ai_chat_benchmark.toml b/src/modelbench/templates/content/general_purpose_ai_chat_benchmark.toml index cea9070f..f87b6ec2 100644 --- a/src/modelbench/templates/content/general_purpose_ai_chat_benchmark.toml +++ b/src/modelbench/templates/content/general_purpose_ai_chat_benchmark.toml @@ -1,8 +1,6 @@ [general_purpose_ai_chat_benchmark] name = "Benchmark for general purpose AI chat model" -uid = "general_purpose_ai_chat_benchmark" - version = "0.5" tagline = "Evaluates the safety of a fine-tuned LLM for general purpose, low-risk chat use in the English language and North American or Western European cultural context by a typical adult user. Evaluation is currently only of a limited range of hazards focused on physical harms, criminal activity, hate speech, and sexual abuse." diff --git a/src/modelbench/templates/test_report.html b/src/modelbench/templates/test_report.html index 9ad2fe72..20eedede 100644 --- a/src/modelbench/templates/test_report.html +++ b/src/modelbench/templates/test_report.html @@ -49,7 +49,7 @@
{{ content(benchmark_score.benchmark_definition, "uid") }}
+{{ benchmark_score.benchmark_definition.uid }}