Skip to content

Commit

Permalink
Update API Benchmarks (#729)
Browse files Browse the repository at this point in the history
  • Loading branch information
czaloom authored Aug 29, 2024
1 parent 7252c45 commit 7a5d411
Showing 1 changed file with 163 additions and 92 deletions.
255 changes: 163 additions & 92 deletions integration_tests/benchmarks/object-detection/benchmark_script.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import os
import re
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
Expand All @@ -17,10 +16,13 @@
client = Client()


def time_it(fn, *args, **kwargs):
start = time()
fn(*args, **kwargs)
return time() - start
def time_it(fn):
def wrapper(*args, **kwargs):
start = time()
results = fn(*args, **kwargs)
return (time() - start, results)

return wrapper


def download_data_if_not_exists(
Expand Down Expand Up @@ -51,9 +53,18 @@ def download_data_if_not_exists(
else:
print(f"{file_name} already exists locally.")

# sort file by datum uid
with open(file_path, "r") as f:
lines = [x for x in f]
with open(file_path, "w") as f:
for line in sorted(
lines, key=lambda x: int(json.loads(x)["datum"]["uid"])
):
f.write(line)


def write_results_to_file(write_path: Path, results: list[dict]):
"""Write results to results.json"""
"""Write results to manager_results.json"""
current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
if os.path.isfile(write_path):
with open(write_path, "r") as file:
Expand All @@ -68,6 +79,7 @@ def write_results_to_file(write_path: Path, results: list[dict]):
json.dump(data, file, indent=4)


@time_it
def ingest_groundtruths(
dataset: Dataset,
path: Path,
Expand All @@ -85,7 +97,7 @@ def ingest_groundtruths(
count += 1
if count >= limit and limit > 0:
break
elif len(chunks) < chunk_size:
elif len(chunks) < chunk_size or chunk_size == -1:
continue

dataset.add_groundtruths(chunks, timeout=timeout)
Expand All @@ -94,32 +106,26 @@ def ingest_groundtruths(
dataset.add_groundtruths(chunks, timeout=timeout)


@time_it
def ingest_predictions(
dataset: Dataset,
model: Model,
datum_uids: list[str],
path: Path,
limit: int,
chunk_size: int,
timeout: int | None,
):
pattern = re.compile(r'"uid":\s*"(\d+)"')
with open(path, "r") as f:
count = 0
chunks = []
for line in f:
match = pattern.search(line)
if not match:
continue
elif match.group(1) not in datum_uids:
continue
pd_dict = json.loads(line)
pd = Prediction.decode_value(pd_dict)
chunks.append(pd)
count += 1
if count >= limit and limit > 0:
break
elif len(chunks) < chunk_size:
elif len(chunks) < chunk_size or chunk_size == -1:
continue

model.add_predictions(dataset, chunks, timeout=timeout)
Expand Down Expand Up @@ -191,54 +197,105 @@ def run_detailed_pr_curve_evaluation(


@dataclass
class DataBenchmark:
dtype: str
ingestion: float
finalization: float
deletion: float

def result(self) -> dict[str, float | str]:
return {
"dtype": self.dtype,
"ingestion": round(self.ingestion, 2),
"finalization": round(self.finalization, 2),
"deletion": round(self.deletion, 2),
}


@dataclass
class EvaluationBenchmark:
class Benchmark:
limit: int
gt_stats: DataBenchmark
pd_stats: DataBenchmark
n_datums: int
n_annotations: int
n_labels: int
gt_type: AnnotationType
pd_type: AnnotationType
chunk_size: int
gt_ingest: float
gt_finalization: float
gt_deletion: float
pd_ingest: float
pd_finalization: float
pd_deletion: float
eval_base: float
eval_base_pr: float
eval_base_pr_detail: float

def result(self) -> dict[str, float | str | dict[str, str | float]]:
def result(self) -> dict:
return {
"limit": self.limit,
"groundtruths": self.gt_stats.result(),
"predictions": self.pd_stats.result(),
"evaluation": {
"number_of_datums": self.n_datums,
"number_of_annotations": self.n_annotations,
"number_of_labels": self.n_labels,
"base": round(self.eval_base, 2),
"base+pr": round(self.eval_base_pr, 2),
"base+pr+detailed": round(self.eval_base_pr_detail, 2),
"chunk_size": self.chunk_size,
"n_datums": self.n_datums,
"n_annotations": self.n_annotations,
"n_labels": self.n_labels,
"dtype": {
"groundtruth": self.gt_type.value,
"prediction": self.pd_type.value,
},
"base": {
"ingestion": {
"dataset": f"{round(self.gt_ingest, 2)} seconds",
"model": f"{round(self.pd_ingest, 2)} seconds",
},
"finalization": {
"dataset": f"{round(self.gt_finalization, 2)} seconds",
"model": f"{round(self.pd_finalization, 2)} seconds",
},
"evaluation": {
"preprocessing": "0.0 seconds",
"computation": f"{round(self.eval_base, 2)} seconds",
"total": f"{round(self.eval_base, 2)} seconds",
},
"deletion": {
"dataset": f"{round(self.gt_deletion, 2)} seconds",
"model": f"{round(self.pd_deletion, 2)} seconds",
},
},
"base+pr": {
"ingestion": {
"dataset": f"{round(self.gt_ingest, 2)} seconds",
"model": f"{round(self.pd_ingest, 2)} seconds",
},
"finalization": {
"dataset": f"{round(self.gt_finalization, 2)} seconds",
"model": f"{round(self.pd_finalization, 2)} seconds",
},
"evaluation": {
"preprocessing": "0.0 seconds",
"computation": f"{round(self.eval_base_pr, 2)} seconds",
"total": f"{round(self.eval_base_pr, 2)} seconds",
},
"deletion": {
"dataset": f"{round(self.gt_deletion, 2)} seconds",
"model": f"{round(self.pd_deletion, 2)} seconds",
},
}
if self.eval_base_pr > -1
else {},
"base+pr+detailed": {
"ingestion": {
"dataset": f"{round(self.gt_ingest, 2)} seconds",
"model": f"{round(self.pd_ingest, 2)} seconds",
},
"finalization": {
"dataset": f"{round(self.gt_finalization, 2)} seconds",
"model": f"{round(self.pd_finalization, 2)} seconds",
},
"evaluation": {
"preprocessing": "0.0 seconds",
"computation": f"{round(self.eval_base_pr_detail, 2)} seconds",
"total": f"{round(self.eval_base_pr_detail, 2)} seconds",
},
"deletion": {
"dataset": f"{round(self.gt_deletion, 2)} seconds",
"model": f"{round(self.pd_deletion, 2)} seconds",
},
}
if self.eval_base_pr_detail > -1
else {},
}


def run_benchmarking_analysis(
limits_to_test: list[int],
combinations: list[tuple[AnnotationType, AnnotationType]] | None = None,
results_file: str = "results.json",
ingestion_chunk_timeout: int = 30,
chunk_size: int = -1,
ingestion_timeout: int = 30,
evaluation_timeout: int = 30,
compute_pr: bool = True,
compute_detailed: bool = True,
Expand Down Expand Up @@ -305,49 +362,66 @@ def run_benchmarking_analysis(
client.delete_model("yolo")
raise e

# gt ingestion
gt_ingest_time = time_it(
ingest_groundtruths,
# === Ingestion ===
gt_ingest_time, _ = ingest_groundtruths(
dataset=dataset,
path=current_directory / Path(gt_filename),
limit=limit,
chunk_size=1000,
timeout=ingestion_chunk_timeout,
)

# gt finalization
gt_finalization_time = time_it(dataset.finalize)

# pd ingestion
datum_uids = [datum.uid for datum in dataset.get_datums()]
pd_ingest_time = time_it(
ingest_predictions,
chunk_size=chunk_size,
timeout=ingestion_timeout,
) # type: ignore - time_it wrapper
gt_finalization_time, _ = time_it(dataset.finalize)()
pd_ingest_time, _ = ingest_predictions(
dataset=dataset,
model=model,
datum_uids=datum_uids,
path=current_directory / Path(pd_filename),
limit=limit,
chunk_size=1000,
timeout=ingestion_chunk_timeout,
chunk_size=chunk_size,
timeout=ingestion_timeout,
) # type: ignore - time_it wrapper
pd_finalization_time, _ = time_it(model.finalize_inferences)(
dataset
)

# model finalization
pd_finalization_time = time_it(model.finalize_inferences, dataset)

# run evaluations
eval_pr = None
eval_detail = None
eval_base = run_base_evaluation(
# === Base Evaluation ===
base_results = run_base_evaluation(
dset=dataset, model=model, timeout=evaluation_timeout
)
assert base_results.meta
n_datums = base_results.meta["datums"]
n_annotations = base_results.meta["annotations"]
n_labels = base_results.meta["labels"]
base = base_results.meta["duration"]
if base > evaluation_timeout and evaluation_timeout != -1:
raise TimeoutError(
f"Base evaluation timed out with {n_datums} datums."
)

# === PR Evaluation ===
pr = -1
if compute_pr:
eval_pr = run_pr_curve_evaluation(
pr_results = run_pr_curve_evaluation(
dset=dataset, model=model, timeout=evaluation_timeout
)
assert pr_results.meta
pr = pr_results.meta["duration"]
if pr > evaluation_timeout and evaluation_timeout != -1:
raise TimeoutError(
f"PR evaluation timed out with {n_datums} datums."
)

# === Detailed Evaluation ===
detailed = -1
if compute_detailed:
eval_detail = run_detailed_pr_curve_evaluation(
detailed_results = run_detailed_pr_curve_evaluation(
dset=dataset, model=model, timeout=evaluation_timeout
)
assert detailed_results.meta
detailed = detailed_results.meta["duration"]
if detailed > evaluation_timeout and evaluation_timeout != -1:
raise TimeoutError(
f"Detailed evaluation timed out with {n_datums} datums."
)

# delete model
start = time()
Expand All @@ -360,28 +434,23 @@ def run_benchmarking_analysis(
gt_deletion_time = time() - start

results.append(
EvaluationBenchmark(
Benchmark(
limit=limit,
gt_stats=DataBenchmark(
dtype=gt_type,
ingestion=gt_ingest_time,
finalization=gt_finalization_time,
deletion=gt_deletion_time,
),
pd_stats=DataBenchmark(
dtype=pd_type,
ingestion=pd_ingest_time,
finalization=pd_finalization_time,
deletion=pd_deletion_time,
),
n_datums=eval_base.meta["datums"],
n_annotations=eval_base.meta["annotations"],
n_labels=eval_base.meta["labels"],
eval_base=eval_base.meta["duration"],
eval_base_pr=eval_pr.meta["duration"] if eval_pr else -1,
eval_base_pr_detail=(
eval_detail.meta["duration"] if eval_detail else -1
),
n_datums=n_datums,
n_annotations=n_annotations,
n_labels=n_labels,
gt_type=gt_type,
pd_type=pd_type,
chunk_size=chunk_size,
gt_ingest=gt_ingest_time,
gt_finalization=gt_finalization_time,
gt_deletion=gt_deletion_time,
pd_ingest=pd_ingest_time,
pd_finalization=pd_finalization_time,
pd_deletion=pd_deletion_time,
eval_base=base,
eval_base_pr=pr,
eval_base_pr_detail=detailed,
).result()
)

Expand All @@ -395,6 +464,7 @@ def run_benchmarking_analysis(
combinations=[
(AnnotationType.BOX, AnnotationType.BOX),
],
chunk_size=250,
limits_to_test=[5000, 5000],
)

Expand All @@ -403,6 +473,7 @@ def run_benchmarking_analysis(
combinations=[
(AnnotationType.POLYGON, AnnotationType.POLYGON),
],
chunk_size=250,
limits_to_test=[5000, 5000],
)

Expand Down

0 comments on commit 7a5d411

Please sign in to comment.