Skip to content

Commit

Permalink
Annotator exceptions are always fatal.
Browse files Browse the repository at this point in the history
  • Loading branch information
wpietri committed Nov 1, 2024
1 parent 29c1c6c commit cc01e82
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 9 deletions.
9 changes: 5 additions & 4 deletions src/modelbench/benchmark_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def _add_test_annotators(self, test: PromptResponseTest):
self.test_annotators[test.uid] = annotators

def add_finished_item(self, item: "TestRunItem"):
if item.completion() and item.annotations and not item.fatal_exceptions:
if item.completion() and item.annotations and not item.exceptions:
self.finished_items[item.sut.key][item.test.uid].append(item)
self.journal.item_entry("item finished", item)
else:
Expand All @@ -160,7 +160,7 @@ def add_finished_item(self, item: "TestRunItem"):
item,
completion=bool(item.completion()),
annotations=len(item.annotations),
fatal_exceptions=len(item.fatal_exceptions),
fatal_exceptions=len(item.exceptions),
)

self.completed_item_count += 1
Expand Down Expand Up @@ -312,7 +312,7 @@ def handle_item(self, item: TestRunItem):
self.test_run.journal.item_entry("translated sut response", item, response=response)

except Exception as e:
item.fatal_exceptions.append(e)
item.exceptions.append(e)
self.test_run.journal.item_exception_entry("sut exception", item, e)
logger.error(f"failure handling sut item {item}:", exc_info=e)
return item
Expand All @@ -333,7 +333,7 @@ def handle_item(self, item: TestRunItem) -> TestRunItem:
"measured item quality", item, measurements=item.measurements, run_time=timer
)
except Exception as e:
item.fatal_exceptions.append(e)
item.exceptions.append(e)
logger.error(f"failure handling annnotation for {item}", exc_info=e)
self.test_run.journal.item_exception_entry("annotation exception", item, e)
return item
Expand Down Expand Up @@ -373,6 +373,7 @@ def collect_annotations(self, item):

item.annotations[annotator.uid] = annotation
except Exception as e:
item.exceptions.append(e)
logger.error(f"failure handling annotation for {annotator.uid} and {item}", exc_info=e)
self.test_run.journal.item_exception_entry("annotator exception", item, e, annotator=annotator.uid)

Expand Down
2 changes: 1 addition & 1 deletion src/modelbench/benchmark_runner_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class TestRunItem:
sut_response: SUTResponse = None
annotations: dict[str, Annotation] = dataclasses.field(default_factory=dict)
measurements: dict[str, float] = dataclasses.field(default_factory=dict)
fatal_exceptions: list = dataclasses.field(default_factory=list)
exceptions: list = dataclasses.field(default_factory=list)

def prompt_with_context(self) -> PromptWithContext:
return self.test_item.prompts[0]
Expand Down
8 changes: 4 additions & 4 deletions tests/modelbench_tests/test_benchmark_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def test_benchmark_sut_worker_throws_exception(
assert result.test_item == item_from_test
assert result.sut == exploding_sut
assert result.sut_response is None
assert isinstance(result.fatal_exceptions[0], ValueError)
assert isinstance(result.exceptions[0], ValueError)

assert "failure" in caplog.text

Expand All @@ -300,7 +300,7 @@ def test_test_annotation_worker(self, a_wrapped_test, tmp_path, item_from_test,
def test_benchmark_annotation_worker_ignores_failed(self, a_wrapped_test, tmp_path, item_from_test, a_sut):
baw = TestRunAnnotationWorker(self.a_run(tmp_path, suts=[a_sut]), NullCache())
pipeline_item = TestRunItem(a_wrapped_test, item_from_test, a_sut)
pipeline_item.fatal_exceptions.append(ValueError())
pipeline_item.exceptions.append(ValueError())

result = baw.handle_item(pipeline_item)

Expand All @@ -317,7 +317,7 @@ def test_benchmark_annotation_worker_throws_exception(
result = baw.handle_item(pipeline_item)

assert result.annotations == {}
assert len(pipeline_item.fatal_exceptions) == 0 # a single annotator failure is not fatal
assert len(pipeline_item.exceptions) == 1

assert "failure" in caplog.text

Expand All @@ -334,7 +334,7 @@ def test_benchmark_results_collector_handles_failed(self, a_sut, tmp_path, a_wra
run = self.a_run(tmp_path, suts=[a_sut])
brc = TestRunResultsCollector(run)
item = TestRunItem(a_wrapped_test, item_from_test, a_sut)
item.fatal_exceptions.append(ValueError("yes, this value error"))
item.exceptions.append(ValueError("yes, this value error"))

brc.handle_item(item)

Expand Down

0 comments on commit cc01e82

Please sign in to comment.