Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deletion Performance Optimization #659

Merged
merged 17 commits into from
Jul 9, 2024
258 changes: 198 additions & 60 deletions api/tests/functional-tests/backend/core/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,88 @@

import pytest
from pydantic import ValidationError
from sqlalchemy import func
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session

from valor_api import enums, exceptions, schemas
from valor_api import crud, enums, exceptions, schemas
from valor_api.backend import core, models
from valor_api.backend.core.evaluation import (
_fetch_evaluation_from_subrequest,
_fetch_evaluations_and_mark_for_deletion,
validate_request,
)


@pytest.fixture
def gt_clfs_create(
dataset_name: str,
img1: schemas.Datum,
img2: schemas.Datum,
) -> list[schemas.GroundTruth]:
return [
schemas.GroundTruth(
dataset_name=dataset_name,
datum=img1,
annotations=[
schemas.Annotation(
labels=[
schemas.Label(key="k1", value="v1"),
schemas.Label(key="k2", value="v2"),
],
),
],
),
schemas.GroundTruth(
dataset_name=dataset_name,
datum=img2,
annotations=[
schemas.Annotation(
labels=[schemas.Label(key="k2", value="v3")],
),
],
),
]


@pytest.fixture
def pred_clfs_create(
dataset_name: str,
model_name: str,
img1: schemas.Datum,
img2: schemas.Datum,
) -> list[schemas.Prediction]:
return [
schemas.Prediction(
dataset_name=dataset_name,
model_name=model_name,
datum=img1,
annotations=[
schemas.Annotation(
labels=[
schemas.Label(key="k1", value="v1", score=0.2),
schemas.Label(key="k1", value="v2", score=0.8),
schemas.Label(key="k2", value="v4", score=1.0),
],
),
],
),
schemas.Prediction(
dataset_name=dataset_name,
model_name=model_name,
datum=img2,
annotations=[
schemas.Annotation(
labels=[
schemas.Label(key="k2", value="v2", score=0.8),
schemas.Label(key="k2", value="v3", score=0.1),
schemas.Label(key="k2", value="v0", score=0.1),
],
),
],
),
]


@pytest.fixture
def finalized_dataset(db: Session, created_dataset: str) -> str:
core.set_dataset_status(
Expand Down Expand Up @@ -887,65 +958,132 @@ def test_count_active_evaluations(
)


def test__fetch_evaluations_and_mark_for_deletion(
ekorman marked this conversation as resolved.
Show resolved Hide resolved
db: Session, finalized_dataset: str, finalized_model: str
def test_delete_evaluations(
db: Session,
dataset_name: str,
model_name: str,
gt_clfs_create: list[schemas.GroundTruth],
pred_clfs_create: list[schemas.Prediction],
):
# create two evaluations
for metrics_to_return in [
[
enums.MetricType.Precision,
enums.MetricType.Recall,
enums.MetricType.F1,
enums.MetricType.Accuracy,
enums.MetricType.ROCAUC,
],
[
enums.MetricType.Precision,
enums.MetricType.Recall,
enums.MetricType.F1,
enums.MetricType.Accuracy,
enums.MetricType.ROCAUC,
enums.MetricType.PrecisionRecallCurve,
],
]:
core.create_or_get_evaluations(
db,
schemas.EvaluationRequest(
dataset_names=[finalized_dataset],
model_names=[finalized_model],
parameters=schemas.EvaluationParameters(
task_type=enums.TaskType.CLASSIFICATION,
metrics_to_return=metrics_to_return,
),
),
)

# sanity check no evals are in deleting state
evals = db.query(models.Evaluation).all()
assert len(evals) == 2
assert all([e.status != enums.EvaluationStatus.DELETING for e in evals])
assert all(e.meta == {} for e in evals)
crud.create_dataset(
db=db,
dataset=schemas.Dataset(name=dataset_name),
)
for gt in gt_clfs_create:
gt.dataset_name = dataset_name
crud.create_groundtruths(db=db, groundtruths=[gt])
crud.finalize(db=db, dataset_name=dataset_name)

crud.create_model(db=db, model=schemas.Model(name=model_name))
for pd in pred_clfs_create:
pd.dataset_name = dataset_name
pd.model_name = model_name
crud.create_predictions(db=db, predictions=[pd])
crud.finalize(db=db, model_name=model_name, dataset_name=dataset_name)

job_request = schemas.EvaluationRequest(
dataset_names=[dataset_name],
model_names=[model_name],
parameters=schemas.EvaluationParameters(
task_type=enums.TaskType.CLASSIFICATION,
),
)

eval_ids = [e.id for e in evals]
# fetch and update all evaluations, check they're in deleting status
evals = _fetch_evaluations_and_mark_for_deletion(
db, evaluation_ids=[eval_ids[0]]
# create clf evaluation
resp = crud.create_or_get_evaluations(
db=db,
job_request=job_request,
)
assert len(evals) == 1
assert evals[0].status == enums.EvaluationStatus.DELETING
assert len(resp) == 1
evaluation = db.query(models.Evaluation).one_or_none()
assert evaluation

# check the other evaluation is not in deleting status
assert (
db.query(models.Evaluation.status)
.where(models.Evaluation.id == eval_ids[1])
.scalar()
!= enums.EvaluationStatus.DELETING
)
# now call _fetch_evaluations_and_mark_for_deletion with dataset name so expression (ignoring status) will match all evaluations
# but check only the second one was updated
evals = _fetch_evaluations_and_mark_for_deletion(
db, dataset_names=[finalized_dataset]
)
assert len(evals) == 1
assert evals[0].status == enums.EvaluationStatus.DELETING
assert evals[0].id == eval_ids[1]
for status in [
enums.EvaluationStatus.PENDING,
enums.EvaluationStatus.RUNNING,
]:

# set status
try:
evaluation.status = status
db.commit()
except IntegrityError as e:
db.rollback()
raise e

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 1
assert db.scalar(func.count(models.Metric.id)) == 22
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2

# attempt to delete evaluation with PENDING status
with pytest.raises(exceptions.EvaluationRunningError):
core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 1
assert db.scalar(func.count(models.Metric.id)) == 22
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2

# set status to deleting
try:
evaluation.status = enums.EvaluationStatus.DELETING
db.commit()
except IntegrityError as e:
db.rollback()
raise e

# attempt to delete evaluation with DELETING status
# should do nothing as another worker is handling it.
core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 1
assert db.scalar(func.count(models.Metric.id)) == 22
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2

# set status to done
try:
evaluation.status = enums.EvaluationStatus.DONE
db.commit()
except IntegrityError as e:
db.rollback()
raise e

# attempt to delete evaluation with DONE status
core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 0
assert db.scalar(func.count(models.Metric.id)) == 0
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0

# create clf evaluation (again)
resp = crud.create_or_get_evaluations(
db=db,
job_request=job_request,
)
assert len(resp) == 1
evaluation = db.query(models.Evaluation).one_or_none()
assert evaluation

# set status to failed
try:
evaluation.status = enums.EvaluationStatus.FAILED
db.commit()
except IntegrityError as e:
db.rollback()
raise e

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 1
assert db.scalar(func.count(models.Metric.id)) == 22
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2

# attempt to delete evaluation with DONE status
core.delete_evaluations(db=db, evaluation_ids=[evaluation.id])

# check quantities
assert db.scalar(func.count(models.Evaluation.id)) == 0
assert db.scalar(func.count(models.Metric.id)) == 0
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0
18 changes: 18 additions & 0 deletions api/tests/functional-tests/crud/test_create_delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -1353,6 +1353,15 @@ def method_to_test(
meta={},
)

# test evaluation deletion
assert db.scalar(func.count(models.Evaluation.id)) == 2
assert db.scalar(func.count(models.Metric.id)) == 28
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0
crud.delete(db=db, dataset_name=dataset_name)
assert db.scalar(func.count(models.Evaluation.id)) == 0
assert db.scalar(func.count(models.Metric.id)) == 0
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0


def test_create_clf_metrics(
db: Session,
Expand Down Expand Up @@ -1492,3 +1501,12 @@ def test_create_clf_metrics(
)
).all()
assert len(confusion_matrices) == 2

# test evaluation deletion
assert db.scalar(func.count(models.Evaluation.id)) == 1
assert db.scalar(func.count(models.Metric.id)) == 22
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 2
crud.delete(db=db, dataset_name=dataset_name)
assert db.scalar(func.count(models.Evaluation.id)) == 0
assert db.scalar(func.count(models.Metric.id)) == 0
assert db.scalar(func.count(models.ConfusionMatrix.id)) == 0
2 changes: 2 additions & 0 deletions api/valor_api/backend/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from .datum import (
create_datum,
create_datums,
delete_datums,
fetch_datum,
get_paginated_datums,
)
Expand Down Expand Up @@ -84,6 +85,7 @@
"get_annotations",
"create_dataset",
"delete_dataset",
"delete_datums",
"fetch_dataset",
"get_dataset",
"get_paginated_datasets",
Expand Down
Loading
Loading