Deletion Performance Optimization (#659)

Striveworks · Jul 9, 2024 · 5605b48 · 5605b48
1 parent e2152b1
commit 5605b48
Show file tree

Hide file tree

Showing 11 changed files with 392 additions and 154 deletions.
diff --git a/api/tests/functional-tests/backend/core/test_evaluation.py b/api/tests/functional-tests/backend/core/test_evaluation.py
@@ -2,17 +2,88 @@
 
 import pytest
 from pydantic import ValidationError
+from sqlalchemy import func
+from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session
 
-from valor_api import enums, exceptions, schemas
+from valor_api import crud, enums, exceptions, schemas
 from valor_api.backend import core, models
 from valor_api.backend.core.evaluation import (
     _fetch_evaluation_from_subrequest,
-    _fetch_evaluations_and_mark_for_deletion,
     validate_request,
 )
 
 
+@pytest.fixture
+def gt_clfs_create(
+    dataset_name: str,
+    img1: schemas.Datum,
+    img2: schemas.Datum,
+) -> list[schemas.GroundTruth]:
+    return [
+        schemas.GroundTruth(
+            dataset_name=dataset_name,
+            datum=img1,
+            annotations=[
+                schemas.Annotation(
+                    labels=[
+                        schemas.Label(key="k1", value="v1"),
+                        schemas.Label(key="k2", value="v2"),
+                    ],
+                ),
+            ],
+        ),
+        schemas.GroundTruth(
+            dataset_name=dataset_name,
+            datum=img2,
+            annotations=[
+                schemas.Annotation(
+                    labels=[schemas.Label(key="k2", value="v3")],
+                ),
+            ],
+        ),
+    ]
+
+
+@pytest.fixture
+def pred_clfs_create(
+    dataset_name: str,
+    model_name: str,
+    img1: schemas.Datum,
+    img2: schemas.Datum,
+) -> list[schemas.Prediction]:
+    return [
+        schemas.Prediction(
+            dataset_name=dataset_name,
+            model_name=model_name,
+            datum=img1,
+            annotations=[
+                schemas.Annotation(
+                    labels=[
+                        schemas.Label(key="k1", value="v1", score=0.2),
+                        schemas.Label(key="k1", value="v2", score=0.8),
+                        schemas.Label(key="k2", value="v4", score=1.0),
+                    ],
+                ),
+            ],
+        ),
+        schemas.Prediction(
+            dataset_name=dataset_name,
+            model_name=model_name,
+            datum=img2,
+            annotations=[
+                schemas.Annotation(
+                    labels=[
+                        schemas.Label(key="k2", value="v2", score=0.8),
+                        schemas.Label(key="k2", value="v3", score=0.1),
+                        schemas.Label(key="k2", value="v0", score=0.1),
+                    ],
+                ),
+            ],
+        ),
+    ]
+
+
 @pytest.fixture
 def finalized_dataset(db: Session, created_dataset: str) -> str:
     core.set_dataset_status(
@@ -887,65 +958,132 @@ def test_count_active_evaluations(
     )
 
 
-def test__fetch_evaluations_and_mark_for_deletion(
-    db: Session, finalized_dataset: str, finalized_model: str
+def test_delete_evaluations(
+    db: Session,
+    dataset_name: str,
+    model_name: str,
+    gt_clfs_create: list[schemas.GroundTruth],
+    pred_clfs_create: list[schemas.Prediction],
 ):
-    # create two evaluations
-    for metrics_to_return in [
-        [
-            enums.MetricType.Precision,
-            enums.MetricType.Recall,
-            enums.MetricType.F1,
-            enums.MetricType.Accuracy,
-            enums.MetricType.ROCAUC,
-        ],
-        [
-            enums.MetricType.Precision,
-            enums.MetricType.Recall,
-            enums.MetricType.F1,
-            enums.MetricType.Accuracy,
-            enums.MetricType.ROCAUC,
-            enums.MetricType.PrecisionRecallCurve,
-        ],
-    ]:
-        core.create_or_get_evaluations(
-            db,
-            schemas.EvaluationRequest(
-                dataset_names=[finalized_dataset],
-                model_names=[finalized_model],
-                parameters=schemas.EvaluationParameters(
-                    task_type=enums.TaskType.CLASSIFICATION,
-                    metrics_to_return=metrics_to_return,
-                ),
-            ),
-        )
-
-    # sanity check no evals are in deleting state
-    evals = db.query(models.Evaluation).all()
-    assert len(evals) == 2
-    assert all([e.status != enums.EvaluationStatus.DELETING for e in evals])
-    assert all(e.meta == {} for e in evals)
+    crud.create_dataset(
+        db=db,
+        dataset=schemas.Dataset(name=dataset_name),
+    )
+    for gt in gt_clfs_create:
+        gt.dataset_name = dataset_name
+        crud.create_groundtruths(db=db, groundtruths=[gt])
+    crud.finalize(db=db, dataset_name=dataset_name)
+
+    crud.create_model(db=db, model=schemas.Model(name=model_name))
+    for pd in pred_clfs_create:
+        pd.dataset_name = dataset_name
+        pd.model_name = model_name
+        crud.create_predictions(db=db, predictions=[pd])
+    crud.finalize(db=db, model_name=model_name, dataset_name=dataset_name)
+
+    job_request = schemas.EvaluationRequest(
+        dataset_names=[dataset_name],
+        model_names=[model_name],
+        parameters=schemas.EvaluationParameters(
+            task_type=enums.TaskType.CLASSIFICATION,
+        ),
+    )
 
-    eval_ids = [e.id for e in evals]
-    # fetch and update all evaluations, check they're in deleting status
-    evals = _fetch_evaluations_and_mark_for_deletion(
-        db, evaluation_ids=[eval_ids[0]]
+    # create clf evaluation
+    resp = crud.create_or_get_evaluations(
+        db=db,
+        job_request=job_request,
     )
-    assert len(evals) == 1
-    assert evals[0].status == enums.EvaluationStatus.DELETING
+    assert len(resp) == 1
+    evaluation = db.query(models.Evaluation).one_or_none()
+    assert evaluation
 
-    # check the other evaluation is not in deleting status
-    assert (
-        db.query(models.Evaluation.status)
-        .where(models.Evaluation.id == eval_ids[1])
-        .scalar()
-        != enums.EvaluationStatus.DELETING
-    )
-    # now call _fetch_evaluations_and_mark_for_deletion with dataset name so expression (ignoring status) will match all evaluations
-    # but check only the second one was updated
-    evals = _fetch_evaluations_and_mark_for_deletion(
-        db, dataset_names=[finalized_dataset]
-    )
-    assert len(evals) == 1
-    assert evals[0].status == enums.EvaluationStatus.DELETING
-    assert evals[0].id == eval_ids[1]
+    for status in [
+        enums.EvaluationStatus.PENDING,
+        enums.EvaluationStatus.RUNNING,
+    ]:
+
+        # set status
+        try:
+            evaluation.status = status
+            db.commit()
+        except IntegrityError as e:
+            db.rollback()
+            raise e
+
+        # check quantities
+        assert db.scalar(func.count(models.Evaluation.id)) == 1
+        assert db.scalar(func.count(models.Metric.id)) == 22
+        assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2
+
+        # attempt to delete evaluation with PENDING status
+        with pytest.raises(exceptions.EvaluationRunningError):
+            core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])
+
+    # check quantities
+    assert db.scalar(func.count(models.Evaluation.id)) == 1
+    assert db.scalar(func.count(models.Metric.id)) == 22
+    assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2
+
+    # set status to deleting
+    try:
+        evaluation.status = enums.EvaluationStatus.DELETING
+        db.commit()
+    except IntegrityError as e:
+        db.rollback()
+        raise e
+
+    # attempt to delete evaluation with DELETING status
+    # should do nothing as another worker is handling it.
+    core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])
+
+    # check quantities
+    assert db.scalar(func.count(models.Evaluation.id)) == 1
+    assert db.scalar(func.count(models.Metric.id)) == 22
+    assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2
+
+    # set status to done
+    try:
+        evaluation.status = enums.EvaluationStatus.DONE
+        db.commit()
+    except IntegrityError as e:
+        db.rollback()
+        raise e
+
+    # attempt to delete evaluation with DONE status
+    core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])
+
+    # check quantities
+    assert db.scalar(func.count(models.Evaluation.id)) == 0
+    assert db.scalar(func.count(models.Metric.id)) == 0
+    assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0
+
+    # create clf evaluation (again)
+    resp = crud.create_or_get_evaluations(
+        db=db,
+        job_request=job_request,
+    )
+    assert len(resp) == 1
+    evaluation = db.query(models.Evaluation).one_or_none()
+    assert evaluation
+
+    # set status to failed
+    try:
+        evaluation.status = enums.EvaluationStatus.FAILED
+        db.commit()
+    except IntegrityError as e:
+        db.rollback()
+        raise e
+
+    # check quantities
+    assert db.scalar(func.count(models.Evaluation.id)) == 1
+    assert db.scalar(func.count(models.Metric.id)) == 22
+    assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2
+
+    # attempt to delete evaluation with DONE status
+    core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])
+
+    # check quantities
+    assert db.scalar(func.count(models.Evaluation.id)) == 0
+    assert db.scalar(func.count(models.Metric.id)) == 0
+    assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0
diff --git a/api/tests/functional-tests/crud/test_create_delete.py b/api/tests/functional-tests/crud/test_create_delete.py
@@ -1353,6 +1353,15 @@ def method_to_test(
         meta={},
     )
 
+    # test evaluation deletion
+    assert db.scalar(func.count(models.Evaluation.id)) == 2
+    assert db.scalar(func.count(models.Metric.id)) == 28
+    assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0
+    crud.delete(db=db, dataset_name=dataset_name)
+    assert db.scalar(func.count(models.Evaluation.id)) == 0
+    assert db.scalar(func.count(models.Metric.id)) == 0
+    assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0
+
 
 def test_create_clf_metrics(
     db: Session,
@@ -1492,3 +1501,12 @@ def test_create_clf_metrics(
         )
     ).all()
     assert len(confusion_matrices) == 2
+
+    # test evaluation deletion
+    assert db.scalar(func.count(models.Evaluation.id)) == 1
+    assert db.scalar(func.count(models.Metric.id)) == 22
+    assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2
+    crud.delete(db=db, dataset_name=dataset_name)
+    assert db.scalar(func.count(models.Evaluation.id)) == 0
+    assert db.scalar(func.count(models.Metric.id)) == 0
+    assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0
diff --git a/api/valor_api/backend/core/__init__.py b/api/valor_api/backend/core/__init__.py
@@ -26,6 +26,7 @@
 from .datum import (
     create_datum,
     create_datums,
+    delete_datums,
     fetch_datum,
     get_paginated_datums,
 )
@@ -84,6 +85,7 @@
     "get_annotations",
     "create_dataset",
     "delete_dataset",
+    "delete_datums",
     "fetch_dataset",
     "get_dataset",
     "get_paginated_datasets",