diff --git a/README.md b/README.md index 3e5d6cc..d5be0bf 100644 --- a/README.md +++ b/README.md @@ -302,7 +302,7 @@ logs = [] for sample in dataset: prompt = template(sample) response = llm(prompt) - judge_output, score = judge(response) + judge_output, score = judge(response, sample.answer) logs.append({ "sample": sample.model_dump(), diff --git a/docs/quickstart/mcq.md b/docs/quickstart/mcq.md index e20eb50..c692594 100644 --- a/docs/quickstart/mcq.md +++ b/docs/quickstart/mcq.md @@ -50,7 +50,7 @@ logs = [] for sample in dataset: prompt = template(sample) response = llm(prompt) - judge_output, score = judge(response) + judge_output, score = judge(response, sample.answer) logs.append({ "sample": sample.model_dump(),