Skip to content

Commit

Permalink
Add test_evaluate_detection_fp to valor_core (#718)
Browse files Browse the repository at this point in the history
  • Loading branch information
ntlind authored Aug 23, 2024
1 parent df9770f commit c542ce4
Showing 1 changed file with 156 additions and 0 deletions.
156 changes: 156 additions & 0 deletions core/tests/functional-tests/test_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4427,3 +4427,159 @@ def test_two_groundtruths_one_datum(
assert result_dict["meta"]["labels"] == 2
assert result_dict["meta"]["annotations"] == 5
assert result_dict["meta"]["duration"] <= 5


def test_evaluate_detection_pr_fp(img1, img2):
gts = [
schemas.GroundTruth(
datum=img1,
annotations=[
schemas.Annotation(
is_instance=True,
labels=[schemas.Label(key="k1", value="v1")],
bounding_box=schemas.Box.from_extrema(
xmin=0, xmax=5, ymin=0, ymax=5
),
)
],
),
schemas.GroundTruth(
datum=img2,
annotations=[
schemas.Annotation(
is_instance=True,
labels=[schemas.Label(key="k1", value="v1")],
bounding_box=schemas.Box.from_extrema(
xmin=0, xmax=5, ymin=0, ymax=5
),
)
],
),
]
preds = [
schemas.Prediction(
datum=img1,
annotations=[
schemas.Annotation(
is_instance=True,
labels=[schemas.Label(key="k1", value="v1", score=0.8)],
bounding_box=schemas.Box.from_extrema(
xmin=0, xmax=5, ymin=0, ymax=5
),
)
],
),
schemas.Prediction(
datum=img2,
annotations=[
schemas.Annotation(
is_instance=True,
labels=[schemas.Label(key="k1", value="v1", score=0.8)],
bounding_box=schemas.Box.from_extrema(
xmin=10, xmax=20, ymin=10, ymax=20
),
)
],
),
]

eval_job = evaluate_detection(
groundtruths=gts,
predictions=preds,
metrics_to_return=[
enums.MetricType.PrecisionRecallCurve,
],
)

metrics = eval_job.metrics
assert metrics[0]["value"]["v1"][0.5] == {
"fn": 1, # img2
"fp": 1, # img2
"tn": None,
"tp": 1, # img1
"recall": 0.5,
"accuracy": None,
"f1_score": 0.5,
"precision": 0.5,
}

# score threshold is now higher than the scores, so we should the predictions drop out such that we're only left with 2 fns (one for each image)
assert metrics[0]["value"]["v1"][0.85] == {
"tp": 0,
"fp": 0,
"fn": 2,
"tn": None,
"precision": 0.0,
"recall": 0.0,
"accuracy": None,
"f1_score": 0.0,
}

# test DetailedPRCurve version
eval_job = evaluate_detection(
groundtruths=gts,
predictions=preds,
metrics_to_return=[
enums.MetricType.DetailedPrecisionRecallCurve,
],
)

metrics = eval_job.metrics

score_threshold = 0.5
assert metrics[0]["value"]["v1"][score_threshold]["tp"]["total"] == 1
assert "tn" not in metrics[0]["value"]["v1"][score_threshold]
assert (
metrics[0]["value"]["v1"][score_threshold]["fp"]["observations"][
"hallucinations"
]["count"]
== 1
)
assert (
metrics[0]["value"]["v1"][score_threshold]["fp"]["observations"][
"misclassifications"
]["count"]
== 0
)
assert (
metrics[0]["value"]["v1"][score_threshold]["fn"]["observations"][
"no_predictions"
]["count"]
== 1
)
assert metrics[0]["value"]["v1"][score_threshold]["tp"]["total"] == 1
assert (
metrics[0]["value"]["v1"][score_threshold]["fn"]["observations"][
"misclassifications"
]["count"]
== 0
)

# score threshold is now higher than the scores, so we should the predictions drop out such that we're only left with 2 fns (one for each image)
score_threshold = 0.85
assert metrics[0]["value"]["v1"][score_threshold]["tp"]["total"] == 0
assert "tn" not in metrics[0]["value"]["v1"][score_threshold]
assert (
metrics[0]["value"]["v1"][score_threshold]["fp"]["observations"][
"hallucinations"
]["count"]
== 0
)
assert (
metrics[0]["value"]["v1"][score_threshold]["fp"]["observations"][
"misclassifications"
]["count"]
== 0
)
assert (
metrics[0]["value"]["v1"][score_threshold]["fn"]["observations"][
"no_predictions"
]["count"]
== 2
)
assert (
metrics[0]["value"]["v1"][score_threshold]["fn"]["observations"][
"misclassifications"
]["count"]
== 0
)

0 comments on commit c542ce4

Please sign in to comment.