From d2b9cb6f9bb86282f322bb512c087aa7562fe092 Mon Sep 17 00:00:00 2001 From: Sebastian Niehus Date: Thu, 16 May 2024 10:59:40 +0200 Subject: [PATCH] fix: Fix test for incremental evaluator. Rename test_diff_evaluator.py to test_incremental_evaluator.py Task: IL-394 --- .../evaluation/evaluator/incremental_evaluator.py | 2 +- ...diff_evaluator.py => test_incremental_evaluator.py} | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) rename tests/evaluation/{test_diff_evaluator.py => test_incremental_evaluator.py} (90%) diff --git a/src/intelligence_layer/evaluation/evaluation/evaluator/incremental_evaluator.py b/src/intelligence_layer/evaluation/evaluation/evaluator/incremental_evaluator.py index 237386340..75164afc6 100644 --- a/src/intelligence_layer/evaluation/evaluation/evaluator/incremental_evaluator.py +++ b/src/intelligence_layer/evaluation/evaluation/evaluator/incremental_evaluator.py @@ -58,7 +58,7 @@ def do_evaluate( [output for output in outputs if output.run_id in run_output_ids] ) - return self.do_incremental_evaluate(example, outputs, already_evaluated_outputs) + return self.do_incremental_evaluate(example, list(outputs), already_evaluated_outputs) @abstractmethod def do_incremental_evaluate( diff --git a/tests/evaluation/test_diff_evaluator.py b/tests/evaluation/test_incremental_evaluator.py similarity index 90% rename from tests/evaluation/test_diff_evaluator.py rename to tests/evaluation/test_incremental_evaluator.py index 6a96ad004..281db921a 100644 --- a/tests/evaluation/test_diff_evaluator.py +++ b/tests/evaluation/test_incremental_evaluator.py @@ -14,7 +14,7 @@ class DummyEvaluation(BaseModel): - new_run_ids: list[str] + all_run_ids: list[str] old_run_ids: list[list[str]] @@ -29,7 +29,7 @@ def do_incremental_evaluate( already_evaluated_outputs: list[list[SuccessfulExampleOutput[str]]], ) -> DummyEvaluation: return DummyEvaluation( - new_run_ids=[output.run_id for output in outputs], + all_run_ids=[output.run_id for output in outputs], old_run_ids=[ [output.run_id for output in evaluated_output] for evaluated_output in already_evaluated_outputs @@ -46,7 +46,7 @@ def do_run(self, input: str, tracer: Tracer) -> str: return f"{input} {self._info}" -def test_incremental_evaluator_should_filter_previous_run_ids() -> None: +def test_incremental_evaluator_separates_all_runs_and_previous_runs() -> None: # Given examples = [Example(input="a", expected_output="0", id="id_0")] dataset_repository = InMemoryDatasetRepository() @@ -89,7 +89,7 @@ def create_run(name: str) -> str: iter(evaluator.evaluation_lineages(second_evaluation_overview.id)) ).evaluation.result assert isinstance(second_result, DummyEvaluation) - assert second_result.new_run_ids == [second_run_id] + assert second_result.all_run_ids == [first_run_id, second_run_id] assert second_result.old_run_ids == [[first_run_id]] independent_run_id = create_run("independent") @@ -115,6 +115,6 @@ def create_run(name: str) -> str: iter(evaluator.evaluation_lineages(third_evaluation_overview.id)) ).evaluation.result assert isinstance(third_result, DummyEvaluation) - assert third_result.new_run_ids == [third_run_id] + assert sorted(third_result.all_run_ids) == sorted([first_run_id, second_run_id, independent_run_id, third_run_id]) assert sorted(third_result.old_run_ids[0]) == sorted([first_run_id, second_run_id]) assert sorted(third_result.old_run_ids[1]) == sorted([independent_run_id])