Skip to content

Commit

Permalink
Deletion Performance Optimization (#659)
Browse files Browse the repository at this point in the history
  • Loading branch information
czaloom authored Jul 9, 2024
1 parent e2152b1 commit 5605b48
Show file tree
Hide file tree
Showing 11 changed files with 392 additions and 154 deletions.
258 changes: 198 additions & 60 deletions api/tests/functional-tests/backend/core/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,88 @@

import pytest
from pydantic import ValidationError
from sqlalchemy import func
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session

from valor_api import enums, exceptions, schemas
from valor_api import crud, enums, exceptions, schemas
from valor_api.backend import core, models
from valor_api.backend.core.evaluation import (
_fetch_evaluation_from_subrequest,
_fetch_evaluations_and_mark_for_deletion,
validate_request,
)


@pytest.fixture
def gt_clfs_create(
dataset_name: str,
img1: schemas.Datum,
img2: schemas.Datum,
) -> list[schemas.GroundTruth]:
return [
schemas.GroundTruth(
dataset_name=dataset_name,
datum=img1,
annotations=[
schemas.Annotation(
labels=[
schemas.Label(key="k1", value="v1"),
schemas.Label(key="k2", value="v2"),
],
),
],
),
schemas.GroundTruth(
dataset_name=dataset_name,
datum=img2,
annotations=[
schemas.Annotation(
labels=[schemas.Label(key="k2", value="v3")],
),
],
),
]


@pytest.fixture
def pred_clfs_create(
dataset_name: str,
model_name: str,
img1: schemas.Datum,
img2: schemas.Datum,
) -> list[schemas.Prediction]:
return [
schemas.Prediction(
dataset_name=dataset_name,
model_name=model_name,
datum=img1,
annotations=[
schemas.Annotation(
labels=[
schemas.Label(key="k1", value="v1", score=0.2),
schemas.Label(key="k1", value="v2", score=0.8),
schemas.Label(key="k2", value="v4", score=1.0),
],
),
],
),
schemas.Prediction(
dataset_name=dataset_name,
model_name=model_name,
datum=img2,
annotations=[
schemas.Annotation(
labels=[
schemas.Label(key="k2", value="v2", score=0.8),
schemas.Label(key="k2", value="v3", score=0.1),
schemas.Label(key="k2", value="v0", score=0.1),
],
),
],
),
]


@pytest.fixture
def finalized_dataset(db: Session, created_dataset: str) -> str:
core.set_dataset_status(
Expand Down Expand Up @@ -887,65 +958,132 @@ def test_count_active_evaluations(
)


def test__fetch_evaluations_and_mark_for_deletion(
db: Session, finalized_dataset: str, finalized_model: str
def test_delete_evaluations(
db: Session,
dataset_name: str,
model_name: str,
gt_clfs_create: list[schemas.GroundTruth],
pred_clfs_create: list[schemas.Prediction],
):
# create two evaluations
for metrics_to_return in [
[
enums.MetricType.Precision,
enums.MetricType.Recall,
enums.MetricType.F1,
enums.MetricType.Accuracy,
enums.MetricType.ROCAUC,
],
[
enums.MetricType.Precision,
enums.MetricType.Recall,
enums.MetricType.F1,
enums.MetricType.Accuracy,
enums.MetricType.ROCAUC,
enums.MetricType.PrecisionRecallCurve,
],
]:
core.create_or_get_evaluations(
db,
schemas.EvaluationRequest(
dataset_names=[finalized_dataset],
model_names=[finalized_model],
parameters=schemas.EvaluationParameters(
task_type=enums.TaskType.CLASSIFICATION,
metrics_to_return=metrics_to_return,
),
),
)

# sanity check no evals are in deleting state
evals = db.query(models.Evaluation).all()
assert len(evals) == 2
assert all([e.status != enums.EvaluationStatus.DELETING for e in evals])
assert all(e.meta == {} for e in evals)
crud.create_dataset(
db=db,
dataset=schemas.Dataset(name=dataset_name),
)
for gt in gt_clfs_create:
gt.dataset_name = dataset_name
crud.create_groundtruths(db=db, groundtruths=[gt])
crud.finalize(db=db, dataset_name=dataset_name)

crud.create_model(db=db, model=schemas.Model(name=model_name))
for pd in pred_clfs_create:
pd.dataset_name = dataset_name
pd.model_name = model_name
crud.create_predictions(db=db, predictions=[pd])
crud.finalize(db=db, model_name=model_name, dataset_name=dataset_name)

job_request = schemas.EvaluationRequest(
dataset_names=[dataset_name],
model_names=[model_name],
parameters=schemas.EvaluationParameters(
task_type=enums.TaskType.CLASSIFICATION,
),
)

eval_ids = [e.id for e in evals]
# fetch and update all evaluations, check they're in deleting status
evals = _fetch_evaluations_and_mark_for_deletion(
db, evaluation_ids=[eval_ids[0]]
# create clf evaluation
resp = crud.create_or_get_evaluations(
db=db,
job_request=job_request,
)
assert len(evals) == 1
assert evals[0].status == enums.EvaluationStatus.DELETING
assert len(resp) == 1
evaluation = db.query(models.Evaluation).one_or_none()
assert evaluation

# check the other evaluation is not in deleting status
assert (
db.query(models.Evaluation.status)
.where(models.Evaluation.id == eval_ids[1])
.scalar()
!= enums.EvaluationStatus.DELETING
)
# now call _fetch_evaluations_and_mark_for_deletion with dataset name so expression (ignoring status) will match all evaluations
# but check only the second one was updated
evals = _fetch_evaluations_and_mark_for_deletion(
db, dataset_names=[finalized_dataset]
)
assert len(evals) == 1
assert evals[0].status == enums.EvaluationStatus.DELETING
assert evals[0].id == eval_ids[1]
for status in [
enums.EvaluationStatus.PENDING,
enums.EvaluationStatus.RUNNING,
]:

# set status
try:
evaluation.status = status
db.commit()
except IntegrityError as e:
db.rollback()
raise e

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 1
assert db.scalar(func.count(models.Metric.id)) == 22
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2

# attempt to delete evaluation with PENDING status
with pytest.raises(exceptions.EvaluationRunningError):
core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 1
assert db.scalar(func.count(models.Metric.id)) == 22
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2

# set status to deleting
try:
evaluation.status = enums.EvaluationStatus.DELETING
db.commit()
except IntegrityError as e:
db.rollback()
raise e

# attempt to delete evaluation with DELETING status
# should do nothing as another worker is handling it.
core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 1
assert db.scalar(func.count(models.Metric.id)) == 22
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2

# set status to done
try:
evaluation.status = enums.EvaluationStatus.DONE
db.commit()
except IntegrityError as e:
db.rollback()
raise e

# attempt to delete evaluation with DONE status
core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 0
assert db.scalar(func.count(models.Metric.id)) == 0
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0

# create clf evaluation (again)
resp = crud.create_or_get_evaluations(
db=db,
job_request=job_request,
)
assert len(resp) == 1
evaluation = db.query(models.Evaluation).one_or_none()
assert evaluation

# set status to failed
try:
evaluation.status = enums.EvaluationStatus.FAILED
db.commit()
except IntegrityError as e:
db.rollback()
raise e

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 1
assert db.scalar(func.count(models.Metric.id)) == 22
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2

# attempt to delete evaluation with DONE status
core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 0
assert db.scalar(func.count(models.Metric.id)) == 0
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0
18 changes: 18 additions & 0 deletions api/tests/functional-tests/crud/test_create_delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -1353,6 +1353,15 @@ def method_to_test(
meta={},
)

# test evaluation deletion
assert db.scalar(func.count(models.Evaluation.id)) == 2
assert db.scalar(func.count(models.Metric.id)) == 28
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0
crud.delete(db=db, dataset_name=dataset_name)
assert db.scalar(func.count(models.Evaluation.id)) == 0
assert db.scalar(func.count(models.Metric.id)) == 0
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0


def test_create_clf_metrics(
db: Session,
Expand Down Expand Up @@ -1492,3 +1501,12 @@ def test_create_clf_metrics(
)
).all()
assert len(confusion_matrices) == 2

# test evaluation deletion
assert db.scalar(func.count(models.Evaluation.id)) == 1
assert db.scalar(func.count(models.Metric.id)) == 22
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2
crud.delete(db=db, dataset_name=dataset_name)
assert db.scalar(func.count(models.Evaluation.id)) == 0
assert db.scalar(func.count(models.Metric.id)) == 0
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0
2 changes: 2 additions & 0 deletions api/valor_api/backend/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from .datum import (
create_datum,
create_datums,
delete_datums,
fetch_datum,
get_paginated_datums,
)
Expand Down Expand Up @@ -84,6 +85,7 @@
"get_annotations",
"create_dataset",
"delete_dataset",
"delete_datums",
"fetch_dataset",
"get_dataset",
"get_paginated_datasets",
Expand Down
Loading

0 comments on commit 5605b48

Please sign in to comment.