From 5eb82359ca44bc20a9e5e403989159dff34ef3c4 Mon Sep 17 00:00:00 2001 From: Charles Zaloom <38677807+czaloom@users.noreply.github.com> Date: Fri, 14 Jun 2024 17:02:41 -0500 Subject: [PATCH] Add Delete Evaluation Endpoint (#627) --- .../crud/test_evaluation_crud.py | 175 +++++++++++++++++- api/valor_api/backend/__init__.py | 2 + api/valor_api/backend/core/__init__.py | 2 + api/valor_api/backend/core/evaluation.py | 35 ++++ api/valor_api/crud/__init__.py | 3 +- api/valor_api/crud/_delete.py | 19 ++ api/valor_api/main.py | 35 ++++ client/valor/client.py | 13 ++ client/valor/coretypes.py | 24 +++ .../evaluations/test_evaluation_crud.py | 155 ++++++++++++++++ 10 files changed, 461 insertions(+), 2 deletions(-) create mode 100644 integration_tests/client/evaluations/test_evaluation_crud.py diff --git a/api/tests/functional-tests/crud/test_evaluation_crud.py b/api/tests/functional-tests/crud/test_evaluation_crud.py index 7636f76f8..482bb2bfc 100644 --- a/api/tests/functional-tests/crud/test_evaluation_crud.py +++ b/api/tests/functional-tests/crud/test_evaluation_crud.py @@ -1,9 +1,10 @@ import pytest +from sqlalchemy import func, select from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session from valor_api import crud, enums, exceptions, schemas -from valor_api.backend import core +from valor_api.backend import core, models def test_evaluation_creation_exceptions(db: Session): @@ -226,3 +227,175 @@ def test_restart_failed_evaluation(db: Session): assert len(evaluations4) == 1 assert evaluations4[0].status == enums.EvaluationStatus.DONE assert evaluations4[0].id == evaluations1[0].id + + +@pytest.fixture +def create_evaluations(db: Session): + + rows = [ + models.Evaluation( + id=idx, + dataset_names=["1", "2"], + model_name=str(idx), + parameters=schemas.EvaluationParameters( + task_type=enums.TaskType.CLASSIFICATION + ).model_dump(), + filters=schemas.Filter().model_dump(), + status=status, + ) + for idx, status in enumerate(enums.EvaluationStatus) + ] + + try: + db.add_all(rows) + db.commit() + except IntegrityError as e: + db.rollback() + raise e + + yield [(row.id, row.status) for row in rows] + + for row in rows: + try: + db.delete(row) + except IntegrityError: + db.rollback() + + +def test_delete_evaluation(db: Session, create_evaluations): + + for idx, status in create_evaluations: + assert ( + db.scalar( + select(func.count(models.Evaluation.id)).where( + models.Evaluation.id == idx + ) + ) + == 1 + ) + if status in { + enums.EvaluationStatus.PENDING, + enums.EvaluationStatus.RUNNING, + }: + with pytest.raises(exceptions.EvaluationRunningError): + crud.delete_evaluation(db=db, evaluation_id=idx) + assert ( + db.scalar( + select(func.count(models.Evaluation.id)).where( + models.Evaluation.id == idx + ) + ) + == 1 + ) + elif status == enums.EvaluationStatus.DELETING: + with pytest.raises(exceptions.EvaluationDoesNotExistError): + crud.delete_evaluation(db=db, evaluation_id=idx) + assert ( + db.scalar( + select(func.count(models.Evaluation.id)).where( + models.Evaluation.id == idx + ) + ) + == 1 + ) + else: + crud.delete_evaluation(db=db, evaluation_id=idx) + assert ( + db.scalar( + select(func.count(models.Evaluation.id)).where( + models.Evaluation.id == idx + ) + ) + == 0 + ) + + # check for id that doesnt exist + with pytest.raises(exceptions.EvaluationDoesNotExistError): + crud.delete_evaluation(db=db, evaluation_id=10000) + + +@pytest.fixture +def create_evaluation_with_metrics(db: Session): + evaluation_id = 0 + number_of_metrics = 4 + + evaluation = models.Evaluation( + id=evaluation_id, + dataset_names=["1", "2"], + model_name="3", + parameters=schemas.EvaluationParameters( + task_type=enums.TaskType.CLASSIFICATION + ).model_dump(), + filters=schemas.Filter().model_dump(), + status=enums.EvaluationStatus.DONE, + ) + metrics = [ + models.Metric( + evaluation_id=evaluation_id, + label_id=None, + type="Precision", + value=float(i) / float(number_of_metrics), + parameters=dict(), + ) + for i in range(number_of_metrics) + ] + + try: + db.add(evaluation) + db.add_all(metrics) + db.commit() + except IntegrityError as e: + db.rollback() + raise e + + yield (evaluation_id, number_of_metrics) + + for row in [evaluation, *metrics]: + try: + db.delete(row) + db.commit() + except IntegrityError: + db.rollback() + + +def test_delete_evaluation_with_metrics( + db: Session, create_evaluation_with_metrics +): + row_id, num_metrics = create_evaluation_with_metrics + + assert num_metrics == 4 + assert ( + db.scalar( + select(func.count(models.Evaluation.id)).where( + models.Evaluation.id == row_id + ) + ) + == 1 + ) + assert ( + db.scalar( + select(func.count(models.Metric.id)).where( + models.Metric.evaluation_id == row_id + ) + ) + == num_metrics + ) + + crud.delete_evaluation(db=db, evaluation_id=row_id) + + assert ( + db.scalar( + select(func.count(models.Evaluation.id)).where( + models.Evaluation.id == row_id + ) + ) + == 0 + ) + assert ( + db.scalar( + select(func.count(models.Metric.id)).where( + models.Metric.evaluation_id == row_id + ) + ) + == 0 + ) diff --git a/api/valor_api/backend/__init__.py b/api/valor_api/backend/__init__.py index ecbb64656..feded4d84 100644 --- a/api/valor_api/backend/__init__.py +++ b/api/valor_api/backend/__init__.py @@ -5,6 +5,7 @@ create_or_get_evaluations, create_predictions, delete_dataset, + delete_evaluation_from_id, delete_model, get_dataset, get_dataset_status, @@ -45,6 +46,7 @@ "create_predictions", "delete_dataset", "delete_model", + "delete_evaluation_from_id", "get_dataset", "get_paginated_datasets", "get_dataset_summary", diff --git a/api/valor_api/backend/core/__init__.py b/api/valor_api/backend/core/__init__.py index a39a3e640..a4f26870e 100644 --- a/api/valor_api/backend/core/__init__.py +++ b/api/valor_api/backend/core/__init__.py @@ -32,6 +32,7 @@ from .evaluation import ( count_active_evaluations, create_or_get_evaluations, + delete_evaluation_from_id, delete_evaluations, fetch_evaluation_from_id, get_evaluation_requests_from_model, @@ -131,6 +132,7 @@ "get_paginated_evaluations", "get_evaluation_status", "set_evaluation_status", + "delete_evaluation_from_id", "get_evaluation_requests_from_model", "count_active_evaluations", "delete_dataset_annotations", diff --git a/api/valor_api/backend/core/evaluation.py b/api/valor_api/backend/core/evaluation.py index 52120db53..82003e09f 100644 --- a/api/valor_api/backend/core/evaluation.py +++ b/api/valor_api/backend/core/evaluation.py @@ -1055,3 +1055,38 @@ def delete_evaluations( except IntegrityError as e: db.rollback() raise e + + +def delete_evaluation_from_id(db: Session, evaluation_id: int): + """ + Delete a evaluation by id. + + Parameters + ---------- + db : Session + The database session. + evaluation_id : int + The evaluation identifer. + + Raises + ------ + EvaluationRunningError + If the evaluation is currently running. + EvaluationDoesNotExistError + If the evaluation does not exist. + """ + evaluation = fetch_evaluation_from_id(db=db, evaluation_id=evaluation_id) + if evaluation.status in { + enums.EvaluationStatus.PENDING, + enums.EvaluationStatus.RUNNING, + }: + raise exceptions.EvaluationRunningError + elif evaluation.status in enums.EvaluationStatus.DELETING: + return + + try: + db.delete(evaluation) + db.commit() + except IntegrityError as e: + db.rollback() + raise e diff --git a/api/valor_api/crud/__init__.py b/api/valor_api/crud/__init__.py index 1bd0ace4a..1500baa4c 100644 --- a/api/valor_api/crud/__init__.py +++ b/api/valor_api/crud/__init__.py @@ -5,7 +5,7 @@ create_or_get_evaluations, create_predictions, ) -from ._delete import delete +from ._delete import delete, delete_evaluation from ._read import ( get_dataset, get_dataset_summary, @@ -44,4 +44,5 @@ "get_prediction", "finalize", "get_evaluations", + "delete_evaluation", ] diff --git a/api/valor_api/crud/_delete.py b/api/valor_api/crud/_delete.py index 60c6d6706..88155f075 100644 --- a/api/valor_api/crud/_delete.py +++ b/api/valor_api/crud/_delete.py @@ -25,3 +25,22 @@ def delete( backend.delete_dataset(db, dataset_name) elif model_name is not None: backend.delete_model(db, model_name) + + +def delete_evaluation(*, db: Session, evaluation_id: int): + """ + Deletes an evaluation by id. + + Parameters + ---------- + evaluation_id : int + The evaluation identifier. + + Raises + ------ + EvaluationRunningError + If the evaluation is currently running. + EvaluationDoesNotExistError + If the evaluation does not exist. + """ + backend.delete_evaluation_from_id(db=db, evaluation_id=evaluation_id) diff --git a/api/valor_api/main.py b/api/valor_api/main.py index 39a055a60..399d77030 100644 --- a/api/valor_api/main.py +++ b/api/valor_api/main.py @@ -1183,6 +1183,41 @@ def get_evaluations( raise exceptions.create_http_error(e) +@app.delete( + "/evaluations/{evaluation_id}", + dependencies=[Depends(token_auth_scheme)], + tags=["Evaluations"], +) +def delete_evaluation( + evaluation_id: int, + db: Session = Depends(get_db), +): + """ + Delete a evaluation from the database. + + DELETE Endpoint: `/evaluations/{evaluation_id}` + + Parameters + ---------- + evaluation_id : int + The evaluation identifier. + db : Session + The database session to use. This parameter is a sqlalchemy dependency and shouldn't be submitted by the user. + + Raises + ------ + HTTPException (404) + If the evaluation doesn't exist. + HTTPException (409) + If the evaluation isn't in the correct state to be deleted. + """ + logger.debug(f"request to delete evaluation {evaluation_id}") + try: + crud.delete_evaluation(db=db, evaluation_id=evaluation_id) + except Exception as e: + raise exceptions.create_http_error(e) + + """ AUTHENTICATION """ diff --git a/client/valor/client.py b/client/valor/client.py index 339de8b30..9f27f4b3b 100644 --- a/client/valor/client.py +++ b/client/valor/client.py @@ -836,6 +836,19 @@ def _build_query_param(param_name, element, typ): return self._requests_get_rel_host(endpoint).json() + def delete_evaluation(self, evaluation_id: int) -> None: + """ + Deletes an evaluation. + + `DELETE` endpoint. + + Parameters + ---------- + evaluation_id : int + The id of the evaluation to be deleted. + """ + self._requests_delete_rel_host(f"evaluations/{evaluation_id}") + def get_user(self) -> Union[str, None]: """ Gets the users e-mail address (in the case when auth is enabled) diff --git a/client/valor/coretypes.py b/client/valor/coretypes.py index b756e3bec..7f20970e4 100644 --- a/client/valor/coretypes.py +++ b/client/valor/coretypes.py @@ -1804,3 +1804,27 @@ def evaluate( request, allow_retries=allow_retries ) ] + + def delete_evaluation(self, evaluation_id: int, timeout: int = 0) -> None: + """ + Deletes an evaluation. + + Parameters + ---------- + evaluation_id : int + The id of the evaluation to be deleted. + timeout : int + The number of seconds to wait in order to confirm that the model was deleted. + """ + self.conn.delete_evaluation(evaluation_id) + if timeout: + for _ in range(timeout): + try: + self.get_evaluations(evaluation_ids=[evaluation_id]) + except EvaluationDoesNotExist: + break + time.sleep(1) + else: + raise TimeoutError( + "Evaluation wasn't deleted within timeout interval" + ) diff --git a/integration_tests/client/evaluations/test_evaluation_crud.py b/integration_tests/client/evaluations/test_evaluation_crud.py new file mode 100644 index 000000000..fae632b72 --- /dev/null +++ b/integration_tests/client/evaluations/test_evaluation_crud.py @@ -0,0 +1,155 @@ +import pytest +from sqlalchemy import func, select +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session + +# client +from valor import Client, Dataset, GroundTruth, Label, Model, Prediction +from valor.enums import EvaluationStatus +from valor.exceptions import ClientException + +# api +from valor_api import enums, schemas +from valor_api.backend import models + + +@pytest.fixture +def create_evaluations(db: Session): + + rows = [ + models.Evaluation( + id=idx, + dataset_names=["1", "2"], + model_name=str(idx), + parameters=schemas.EvaluationParameters( + task_type=enums.TaskType.CLASSIFICATION + ).model_dump(), + filters=schemas.Filter().model_dump(), + status=status, + ) + for idx, status in enumerate(enums.EvaluationStatus) + ] + + try: + db.add_all(rows) + db.commit() + except IntegrityError as e: + db.rollback() + raise e + + yield [(row.id, row.status) for row in rows] + + for row in rows: + try: + db.delete(row) + except IntegrityError: + db.rollback() + + +def test_delete_evaluation(db: Session, client: Client, create_evaluations): + + for idx, status in create_evaluations: + assert ( + db.scalar( + select(func.count(models.Evaluation.id)).where( + models.Evaluation.id == idx + ) + ) + == 1 + ) + + if status in { + enums.EvaluationStatus.PENDING, + enums.EvaluationStatus.RUNNING, + }: + with pytest.raises(ClientException) as e: + client.delete_evaluation(evaluation_id=idx) + assert "EvaluationRunningError" in str(e) + assert ( + db.scalar( + select(func.count(models.Evaluation.id)).where( + models.Evaluation.id == idx + ) + ) + == 1 + ) + elif status == enums.EvaluationStatus.DELETING: + with pytest.raises(ClientException) as e: + client.delete_evaluation(evaluation_id=idx) + assert "EvaluationDoesNotExistError" in str(e) + assert ( + db.scalar( + select(func.count(models.Evaluation.id)).where( + models.Evaluation.id == idx + ) + ) + == 1 + ) + else: + client.delete_evaluation(evaluation_id=idx) + assert ( + db.scalar( + select(func.count(models.Evaluation.id)).where( + models.Evaluation.id == idx + ) + ) + == 0 + ) + + # check for id that doesnt exist + with pytest.raises(ClientException) as e: + client.delete_evaluation(evaluation_id=10000) + assert "EvaluationDoesNotExistError" in str(e) + + +def test_delete_evaluation_scope( + client: Client, + gt_clfs: list[GroundTruth], + pred_clfs: list[Prediction], + dataset_name: str, + model_name: str, +): + dataset = Dataset.create(dataset_name) + for gt in gt_clfs: + dataset.add_groundtruth(gt) + dataset.finalize() + + model = Model.create(model_name) + for pd in pred_clfs: + model.add_prediction(dataset, pd) + model.finalize_inferences(dataset) + + eval1 = model.evaluate_classification(dataset) + assert eval1.wait_for_completion(timeout=30) == EvaluationStatus.DONE + eval2 = model.evaluate_classification( + dataset, filter_by=[Label.key == "k4"] + ) + assert eval2.wait_for_completion(timeout=30) + + assert eval1.id != eval2.id + assert len(client.get_evaluations(evaluation_ids=[eval1.id])) == 1 + assert len(client.get_evaluations(evaluation_ids=[eval2.id])) == 1 + + # delete eval 1 + client.delete_evaluation(eval1.id) + + assert len(client.get_evaluations(evaluation_ids=[eval1.id])) == 0 + assert len(client.get_evaluations(evaluation_ids=[eval2.id])) == 1 + + # show that we can still make evaluations + eval3 = model.evaluate_classification(dataset) + assert eval3.wait_for_completion(timeout=30) + + assert eval1.id != eval2.id + assert eval1.id != eval3.id + assert eval2.id != eval3.id + assert len(client.get_evaluations(evaluation_ids=[eval1.id])) == 0 + assert len(client.get_evaluations(evaluation_ids=[eval2.id])) == 1 + assert len(client.get_evaluations(evaluation_ids=[eval3.id])) == 1 + + # show that eval1 was repreated in eval3 + assert eval1.id != eval3.id + for metric in eval1.metrics: + assert metric in eval3.metrics + for metric in eval3.metrics: + assert metric in eval1.metrics