Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add benchmarking workflow to evaluate classification and OD runtimes #638

Merged
merged 30 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
1c80aca
add classification benchmarks
ntlind Jun 27, 2024
6816ebb
typo
ntlind Jun 27, 2024
7ec99f2
try changing paths
ntlind Jun 27, 2024
d344350
try new workflow
ntlind Jun 27, 2024
e81a903
fix results.json
ntlind Jun 27, 2024
299a9e2
change quotes
ntlind Jun 27, 2024
d602ebd
fix typo
ntlind Jun 27, 2024
4d506a8
preserve formatting
ntlind Jun 27, 2024
d0db76b
update benchmark query
ntlind Jun 27, 2024
5791c93
start adding od script
ntlind Jun 28, 2024
1d2db2b
get detection test working
ntlind Jun 28, 2024
ea6487a
add od benchmarks
ntlind Jul 1, 2024
3f6cccf
turn down limit
ntlind Jul 1, 2024
f7a1b43
reduce limit again
ntlind Jul 1, 2024
9868598
return number of annotations
ntlind Jul 1, 2024
6f1cd70
up limit
ntlind Jul 1, 2024
fc63108
reduce limit
ntlind Jul 1, 2024
f8075e4
capture number of labels and annotations
ntlind Jul 1, 2024
bc73fb4
use info in meta_
ntlind Jul 1, 2024
fd6a58b
remove aws role
ntlind Jul 1, 2024
094732c
Merge branch 'main' into add_benchmark_utilities
ntlind Jul 3, 2024
1efbfad
Merge branch 'main' into add_benchmark_utilities
ntlind Jul 3, 2024
fc9c51c
add timeout, move file location to cloud
ntlind Jul 3, 2024
d93e32a
add limits
ntlind Jul 3, 2024
df41476
try new limits
ntlind Jul 3, 2024
0f493d7
finalize od limits
ntlind Jul 3, 2024
05b6e7d
Merge branch 'main' into add_benchmark_utilities
ntlind Jul 3, 2024
c13e115
reduce limit
ntlind Jul 3, 2024
7be70c3
Merge branch 'main' into add_benchmark_utilities
ntlind Jul 3, 2024
b8ef066
Merge branch 'main' into add_benchmark_utilities
ntlind Jul 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions .github/workflows/benchmark-evaluations.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Run benchmarks on pre-existing data

on:
push:
branches: "**"

permissions:
id-token: write
contents: read

jobs:
run-benchmarks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: build postgres
run: |
docker build ./database -t pgvalor
- name: setup back end test env
run: docker compose -p valor -f docker-compose.yml -f docker-compose.cicd-override.yml --env-file ./api/.env.testing up --build -d
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: install api
run: pip install -e ".[test]"
working-directory: ./api
- name: install client
run: pip install -e ".[test]"
working-directory: ./client
- name: run classification benchmarks
run: python benchmark_script.py
working-directory: ./integration_tests/benchmarks/classification
- name: print classification results
run: |
export BENCHMARK_RESULTS=$(python -c "import os;import json;print(json.dumps(json.load(open('results.json', 'r')), indent=4));")
echo "$BENCHMARK_RESULTS"
working-directory: ./integration_tests/benchmarks/classification
- name: run object detection benchmarks
run: python benchmark_script.py
working-directory: ./integration_tests/benchmarks/object-detection
- name: print object detection results
run: |
export BENCHMARK_RESULTS=$(python -c "import os;import json;print(json.dumps(json.load(open('results.json', 'r')), indent=4));")
echo "$BENCHMARK_RESULTS"
working-directory: ./integration_tests/benchmarks/object-detection
- run: make stop-env
81 changes: 0 additions & 81 deletions .github/workflows/stress-test.yml

This file was deleted.

1 change: 1 addition & 0 deletions integration_tests/benchmarks/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
results.json
188 changes: 188 additions & 0 deletions integration_tests/benchmarks/classification/benchmark_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import json
import os
from datetime import datetime
from time import time

from valor import (
Annotation,
Client,
Dataset,
Datum,
GroundTruth,
Label,
Model,
Prediction,
connect,
)

connect("http://0.0.0.0:8000")
client = Client()


def write_results_to_file(write_path: str, result_dict: dict):
"""Write results to results.json"""
current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S")

if os.path.isfile(write_path):
with open(write_path, "r") as file:
file.seek(0)
data = json.load(file)
else:
data = {}

data[current_datetime] = result_dict

with open(write_path, "w+") as file:
json.dump(data, file, indent=4)


def ingest_groundtruths_and_predictions(
dset: Dataset, model: Model, raw: dict, pair_limit: int
):
"""Ingest the data into Valor."""

groundtruths = []
predictions = []
slice_ = (
raw["groundtruth_prediction_pairs"][:pair_limit]
if pair_limit != -1
else raw["groundtruth_prediction_pairs"]
)
for groundtruth, prediction in slice_:
groundtruths.append(
GroundTruth(
datum=Datum(
uid=groundtruth["value"]["datum"]["uid"],
metadata={"width": 224, "height": 224},
),
annotations=[
Annotation(
labels=[
Label(
key=label["key"],
value=label["value"],
score=label["score"],
)
for label in annotation["labels"]
],
)
for annotation in groundtruth["value"]["annotations"]
],
)
)

predictions.append(
Prediction(
datum=Datum(
uid=prediction["value"]["datum"]["uid"],
metadata={"width": 224, "height": 224},
),
annotations=[
Annotation(
labels=[
Label(
key=label["key"],
value=label["value"],
score=label["score"],
)
for label in annotation["labels"]
],
)
for annotation in prediction["value"]["annotations"]
],
)
)

for gt in groundtruths:
dset.add_groundtruth(gt)

for pred in predictions:
model.add_prediction(dset, pred)

dset.finalize()
model.finalize_inferences(dataset=dset)


def run_base_evaluation(dset: Dataset, model: Model):
"""Run a base evaluation (with no PR curves)."""
evaluation = model.evaluate_classification(dset)
evaluation.wait_for_completion()
return evaluation


def run_pr_curve_evaluation(dset: Dataset, model: Model):
"""Run a base evaluation with PrecisionRecallCurve included."""
evaluation = model.evaluate_classification(
dset,
metrics_to_return=[
"Accuracy",
"Precision",
"Recall",
"F1",
"ROCAUC",
"PrecisionRecallCurve",
],
)
evaluation.wait_for_completion()
return evaluation


def run_detailed_pr_curve_evaluation(dset: Dataset, model: Model):
"""Run a base evaluation with PrecisionRecallCurve and DetailedPrecisionRecallCurve included."""

evaluation = model.evaluate_classification(
dset,
metrics_to_return=[
"Accuracy",
"Precision",
"Recall",
"F1",
"ROCAUC",
"PrecisionRecallCurve",
"DetailedPrecisionRecallCurve",
],
)
evaluation.wait_for_completion()
return evaluation


def run_benchmarking_analysis(
limits_to_test: list[int] = [100, 500],
results_file: str = "results.json",
data_file: str = "data.json",
):
"""Time various function calls and export the results."""
current_directory = os.path.dirname(os.path.realpath(__file__))
write_path = f"{current_directory}/{results_file}"
read_path = f"{current_directory}/{data_file}"
for limit in limits_to_test:
dset = Dataset.create(name="bird-identification")
model = Model.create(name="some_model")

with open(read_path) as f:
raw_data = json.load(f)

start_time = time()

ingest_groundtruths_and_predictions(
dset=dset, model=model, raw=raw_data, pair_limit=limit
)
ingest_time = time() - start_time

eval_ = run_base_evaluation(dset=dset, model=model)
czaloom marked this conversation as resolved.
Show resolved Hide resolved

results = {
"number_of_datums": limit,
"number_of_unique_labels": eval_.meta["labels"],
"number_of_annotations": eval_.meta["labels"],
"ingest_runtime": f"{(ingest_time):.1f} seconds",
"eval_runtime": f"{(eval_.meta['duration']):.1f} seconds",
}
write_results_to_file(write_path=write_path, result_dict=results)

client.delete_dataset(dset.name, timeout=30)
client.delete_model(model.name, timeout=30)


if __name__ == "__main__":
run_benchmarking_analysis()
Loading
Loading