Skip to content

Commit

Permalink
Regenerating test data to match new structure.
Browse files Browse the repository at this point in the history
  • Loading branch information
wpietri committed Feb 21, 2024
1 parent 1cfbdca commit b2875de
Show file tree
Hide file tree
Showing 5 changed files with 3 additions and 4 deletions.
1 change: 0 additions & 1 deletion src/coffee/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def benchmark(output_dir: pathlib.Path, max_instances: int, debug: bool, web_onl
print(termcolor.colored(f'Examining system "{sut.display_name}"', "yellow"))
for benchmark_definition in benchmarks:
print(termcolor.colored(f' Starting run for benchmark "{benchmark_definition.name()}"', "green"))
print(f"Benchmark definition: {benchmark_definition}")
harm_scores = []
for harm in benchmark_definition.harms():
results = {}
Expand Down
Binary file modified tests/data/newhelm_runs/bbq/test_records.pickle
Binary file not shown.
Binary file modified tests/data/newhelm_runs/toxicity/test_records.pickle
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/make_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from newhelm.general import get_or_create_json_file
from newhelm.load_plugins import load_plugins
from newhelm.runners.simple_benchmark_runner import run_prompt_response_test
from newhelm.runners.simple_test_runner import run_prompt_response_test
from newhelm.secrets_registry import SECRETS
from newhelm.sut_registry import SUTS

Expand Down
4 changes: 2 additions & 2 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ def test_toxicity_scoring(datafiles):
helm_scores = pickle.load(out)

harm_score = bd.score(helm_scores)
assert pytest.approx(harm_score.value()) == 0.6363636
assert harm_score.stars() == 3.0
assert pytest.approx(harm_score.value()) == 0.672727
assert harm_score.stars() == 3.5


def test_quantize_stars():
Expand Down

0 comments on commit b2875de

Please sign in to comment.