Skip to content

Commit

Permalink
fix: add validation for evaluation dataset fields, update logging inf…
Browse files Browse the repository at this point in the history
…o for eval api request count

PiperOrigin-RevId: 633729236
  • Loading branch information
jsondai authored and copybara-github committed May 14, 2024
1 parent 7ff8071 commit d6ef500
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 4 deletions.
13 changes: 13 additions & 0 deletions tests/unit/vertexai/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,19 @@ def test_evaluate_pairwise_metrics_with_multiple_baseline_models(self):
):
test_eval_task.evaluate(model=mock_candidate_model)

def test_evaluate_invalid_model_and_dataset_input(self):
test_eval_task = evaluation.EvalTask(
dataset=_TEST_EVAL_DATASET,
metrics=_TEST_METRICS,
)
with pytest.raises(
ValueError,
match=("The `model` parameter is specified, but the evaluation `dataset`"),
):
test_eval_task.evaluate(
model=generative_models.GenerativeModel(model_name="invalid_model_name")
)


@pytest.mark.usefixtures("google_auth_mock")
class TestEvaluationUtils:
Expand Down
2 changes: 1 addition & 1 deletion vertexai/preview/evaluation/_eval_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class EvalTask:
documentation page [Evaluation methods and metrics](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval).
Usage:
1. To perform bring your own prediction evaluation, provide the model
1. To perform bring-your-own-prediction(BYOP) evaluation, provide the model
responses in the response column in the dataset. The response column name
is "response" by default, or specify `response_column_name` parameter to
customize.
Expand Down
22 changes: 19 additions & 3 deletions vertexai/preview/evaluation/_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,7 @@ async def _compute_metrics(
metric_name = metric
tasks_by_metric[metric_name].append(task)

api_request_count = (len(api_metrics) + len(custom_metrics)) * len(
evaluation_run_config.dataset)
api_request_count = len(api_metrics) * len(evaluation_run_config.dataset)
_LOGGER.info(
f"Computing metrics with a total of {api_request_count} Vertex online"
" evaluation service requests."
Expand Down Expand Up @@ -629,7 +628,8 @@ def evaluate(
Raises:
ValueError: If the metrics list is empty, or the prompt template is not
provided for PairwiseMetric, or multiple baseline models are specified for
PairwiseMetric instances.
PairwiseMetric instances, or both model and dataset model response column
are present.
"""

if not metrics:
Expand All @@ -655,6 +655,22 @@ def evaluate(
constants.Dataset.REFERENCE_COLUMN
)

if (
model
and evaluation_run_config.column_map.get(
constants.Dataset.MODEL_RESPONSE_COLUMN
)
in dataset.columns
):
raise ValueError(
"The `model` parameter is specified, but the evaluation `dataset`"
f" contains model response column `{response_column_name}` to perform"
" bring-your-own-prediction(BYOP) evaluation. If you would like to"
" perform rapid evaluation using the dataset with the existing model"
f" response column `{response_column_name}`, please remove the"
" `model` input parameter."
)

baseline_model = None
pairwise_metric_exists = any(
isinstance(metric, metrics_base.PairwiseMetric)
Expand Down

0 comments on commit d6ef500

Please sign in to comment.