Skip to content

Commit

Permalink
add tests for repetitions
Browse files Browse the repository at this point in the history
  • Loading branch information
samnoyes committed May 23, 2024
1 parent 61e1b46 commit 88d70c8
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions python/tests/evaluation/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def precision(runs: Sequence[Run], examples: Sequence[Example]):
def predict(inputs: dict) -> dict:
return {"output": "Yes"}

evaluate(
results = evaluate(
predict,
data=dataset_name,
evaluators=[accuracy],
Expand All @@ -39,7 +39,13 @@ def predict(inputs: dict) -> dict:
"my-prompt-version": "abcd-1234",
"function": "evaluate",
},
num_repetitions=3,
)
results.wait()
assert len(results) == 30
examples = client.list_examples(dataset_name=dataset_name)
for example in examples:
assert len([r for r in results if r["example"].id == example.id]) == 3


async def test_aevaluate():
Expand All @@ -65,7 +71,7 @@ async def apredict(inputs: dict) -> dict:
await asyncio.sleep(0.1)
return {"output": "Yes"}

await aevaluate(
results = await aevaluate(
apredict,
data=dataset_name,
evaluators=[accuracy],
Expand All @@ -76,7 +82,17 @@ async def apredict(inputs: dict) -> dict:
"my-prompt-version": "abcd-1234",
"function": "aevaluate",
},
num_repetitions=2,
)
assert len(results) == 20
examples = client.list_examples(dataset_name=dataset_name)
all_results = [r async for r in results]
for example in examples:
count = 0
for r in all_results:
if r["run"].reference_example_id == example.id:
count += 1
assert count == 2


@unit
Expand Down

0 comments on commit 88d70c8

Please sign in to comment.