diff --git a/.github/workflows/lite-benchmark-evaluations.yml b/.github/workflows/lite-benchmark-evaluations.yml index 8afe7ed21..82d80b7bf 100644 --- a/.github/workflows/lite-benchmark-evaluations.yml +++ b/.github/workflows/lite-benchmark-evaluations.yml @@ -35,4 +35,3 @@ jobs: export BENCHMARK_RESULTS=$(python -c "import os;import json;print(json.dumps(json.load(open('objdet_results.json', 'r')), indent=4));") echo "$BENCHMARK_RESULTS" working-directory: ./lite/benchmarks/ - - run: make stop-env diff --git a/.github/workflows/lite-synthetic-benchmarks.yml b/.github/workflows/lite-synthetic-benchmarks.yml new file mode 100644 index 000000000..daaefe5dc --- /dev/null +++ b/.github/workflows/lite-synthetic-benchmarks.yml @@ -0,0 +1,24 @@ +name: "[valor-lite] synthetic benchmarks" + +on: + push: + branches: "**" + +permissions: + id-token: write + contents: read + +jobs: + run-benchmarks: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: install lite + run: pip install -e . + working-directory: ./lite + - name: benchmark semantic segmentation + run: python benchmark_semantic_segmentation.py + working-directory: ./lite/benchmarks/synthetic/ diff --git a/lite/benchmarks/synthetic/benchmark_semantic_segmentation.py b/lite/benchmarks/synthetic/benchmark_semantic_segmentation.py new file mode 100644 index 000000000..737da165c --- /dev/null +++ b/lite/benchmarks/synthetic/benchmark_semantic_segmentation.py @@ -0,0 +1,94 @@ +from valor_lite.profiling import Benchmark, BenchmarkError +from valor_lite.semantic_segmentation.benchmark import ( + benchmark_add_data, + benchmark_evaluate, + benchmark_finalize, +) + + +def benchmark( + bitmask_shape: tuple[int, int], + number_of_unique_labels: int, + number_of_images: int, + *_, + memory_limit: float = 4.0, + time_limit: float = 10.0, + repeat: int = 1, + verbose: bool = False, +): + """ + Runs a single benchmark. + + Parameters + ---------- + bitmask_shape : tuple[int, int] + The size (h, w) of the bitmask array. + number_of_unique_labels : int + The number of unique labels used in the synthetic example. + number_of_images : int + The number of distinct datums that are created. + memory_limit : float + The maximum amount of system memory allowed in gigabytes (GB). + time_limit : float + The maximum amount of time permitted before killing the benchmark. + repeat : int + The number of times to run a benchmark to produce an average runtime. + verbose : bool, default=False + Toggles terminal output of benchmark results. + """ + + b = Benchmark( + time_limit=time_limit, + memory_limit=int(memory_limit * (1024**3)), + repeat=repeat, + verbose=verbose, + ) + + _, failed, details = b.run( + benchmark=benchmark_add_data, + n_labels=[number_of_unique_labels], + shape=[bitmask_shape], + ) + if failed: + raise BenchmarkError( + benchmark=details["benchmark"], + error_type=failed[0]["error"], + error_message=failed[0]["msg"], + ) + + _, failed, details = b.run( + benchmark=benchmark_finalize, + n_datums=[number_of_images], + n_labels=[number_of_unique_labels], + ) + if failed: + raise BenchmarkError( + benchmark=details["benchmark"], + error_type=failed[0]["error"], + error_message=failed[0]["msg"], + ) + + _, failed, details = b.run( + benchmark=benchmark_evaluate, + n_datums=[number_of_images], + n_labels=[number_of_unique_labels], + ) + if failed: + raise BenchmarkError( + benchmark=details["benchmark"], + error_type=failed[0]["error"], + error_message=failed[0]["msg"], + ) + + +if __name__ == "__main__": + + benchmark( + bitmask_shape=(4000, 4000), + number_of_images=1000, + number_of_unique_labels=10, + memory_limit=4.0, + time_limit=10.0, + repeat=1, + verbose=True, + ) diff --git a/lite/examples/benchmarking.ipynb b/lite/examples/benchmarking.ipynb new file mode 100644 index 000000000..7774a759e --- /dev/null +++ b/lite/examples/benchmarking.ipynb @@ -0,0 +1,279 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from valor_lite.profiling import Benchmark\n", + "\n", + "b = Benchmark(\n", + " time_limit=5.0, # 5s\n", + " memory_limit=8 * (1024 ** 3), # 8 GB\n", + " repeat=1,\n", + " verbose=True,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Semantic Segmentation" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from valor_lite.semantic_segmentation.benchmark import (\n", + " benchmark_add_data as semseg_add_data,\n", + " benchmark_finalize as semseg_finalize,\n", + " benchmark_evaluate as semseg_evaluate,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "n_datums = [\n", + " 10000,\n", + " 1000,\n", + " 100,\n", + " 10,\n", + " 1,\n", + "]\n", + "\n", + "n_labels = [\n", + " 1000,\n", + " 100,\n", + " 10,\n", + " 3,\n", + "]\n", + "\n", + "shapes = [\n", + " (10000, 10000),\n", + " (2500, 2500),\n", + " (1000, 1000),\n", + " (100, 100),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 69%|██████▉ | 11/16 [00:46<00:21, 4.26s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=====================================================================\n", + "Details\n", + "{\n", + " \"benchmark\": \"benchmark_add_data\",\n", + " \"limits\": {\n", + " \"memory_limit\": \"8.0 GB\",\n", + " \"time_limit\": \"5.0 seconds\",\n", + " \"repeat\": 1\n", + " },\n", + " \"passed\": 8,\n", + " \"failed\": 8,\n", + " \"total\": 16\n", + "}\n", + "\n", + "Passed\n", + " complexity | runtime | n_labels | shape \n", + "---------------------------------------------------------------------\n", + " 300000000 | 1.5151 | 3 | (10000, 10000) \n", + " 62500000 | 0.5952 | 10 | (2500, 2500) \n", + " 10000000 | 0.0911 | 10 | (1000, 1000) \n", + " 1000000 | 0.0582 | 100 | (100, 100) \n", + "\n", + "Failed\n", + " complexity | error | n_labels | shape | msg \n", + "---------------------------------------------------------------------------------------\n", + " 100000000000 | MemoryError | 1000 | (10000, 10000) | Unable to allocate 186. GiB for an array with shape (1001, 20000, 10000) and data type bool\n", + " 10000000000 | MemoryError | 100 | (10000, 10000) | Unable to allocate 18.8 GiB for an array with shape (101, 20000, 10000) and data type bool\n", + " 6250000000 | MemoryError | 1000 | (2500, 2500) | Unable to allocate 11.7 GiB for an array with shape (1001, 5000, 2500) and data type bool\n", + " 1000000000 | MemoryError | 10 | (10000, 10000) | Unable to allocate 9.31 GiB for an array with shape (10, 10, 100000000) and data type bool\n", + " 1000000000 | MemoryError | 1000 | (1000, 1000) | Unable to allocate 931. GiB for an array with shape (1000, 1000, 1000000) and data type bool\n", + " 625000000 | MemoryError | 100 | (2500, 2500) | Unable to allocate 58.2 GiB for an array with shape (100, 100, 6250000) and data type bool\n", + " 100000000 | MemoryError | 100 | (1000, 1000) | Unable to allocate 9.31 GiB for an array with shape (100, 100, 1000000) and data type bool\n", + " 10000000 | MemoryError | 1000 | (100, 100) | Unable to allocate 9.31 GiB for an array with shape (1000, 1000, 10000) and data type bool\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "_ = b.run(\n", + " benchmark=semseg_add_data,\n", + " n_labels=n_labels,\n", + " shape=shapes,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 20%|██ | 4/20 [02:35<10:22, 38.92s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=====================================================================\n", + "Details\n", + "{\n", + " \"benchmark\": \"benchmark_finalize\",\n", + " \"limits\": {\n", + " \"memory_limit\": \"8.0 GB\",\n", + " \"time_limit\": \"5.0 seconds\",\n", + " \"repeat\": 1\n", + " },\n", + " \"passed\": 18,\n", + " \"failed\": 2,\n", + " \"total\": 20\n", + "}\n", + "\n", + "Passed\n", + " complexity | runtime | n_datums | n_labels \n", + "---------------------------------------------------------------------\n", + " 1000000 | 1.1142 | 10000 | 100 \n", + " 100000 | 0.1748 | 100 | 1000 \n", + " 100000 | 0.1086 | 1000 | 100 \n", + "\n", + "Failed\n", + " complexity | error | n_datums | n_labels | msg \n", + "---------------------------------------------------------------------------------------\n", + " 10000000 | MemoryError | 10000 | 1000 | Unable to allocate 7.63 MiB for an array with shape (1000, 1000) and data type int64\n", + " 1000000 | MemoryError | 1000 | 1000 | \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "_ = b.run(\n", + " benchmark=semseg_finalize,\n", + " n_datums=n_datums,\n", + " n_labels=n_labels,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 20%|██ | 4/20 [02:25<09:40, 36.28s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=====================================================================\n", + "Details\n", + "{\n", + " \"benchmark\": \"benchmark_evaluate\",\n", + " \"limits\": {\n", + " \"memory_limit\": \"8.0 GB\",\n", + " \"time_limit\": \"5.0 seconds\",\n", + " \"repeat\": 1\n", + " },\n", + " \"passed\": 18,\n", + " \"failed\": 2,\n", + " \"total\": 20\n", + "}\n", + "\n", + "Passed\n", + " complexity | runtime | n_datums | n_labels \n", + "---------------------------------------------------------------------\n", + " 1000000 | 0.0537 | 10000 | 100 \n", + " 100000 | 0.0815 | 100 | 1000 \n", + " 100000 | 0.0137 | 1000 | 100 \n", + "\n", + "Failed\n", + " complexity | error | n_datums | n_labels | msg \n", + "---------------------------------------------------------------------------------------\n", + " 10000000 | MemoryError | 10000 | 1000 | Unable to allocate 23.8 MiB for an array with shape (1000, 1000, 25) and data type bool\n", + " 1000000 | MemoryError | 1000 | 1000 | Unable to allocate 3.73 GiB for an array with shape (1000, 1001, 1001) and data type int32\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "_ = b.run(\n", + " benchmark=semseg_evaluate,\n", + " n_datums=n_datums,\n", + " n_labels=n_labels,\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".env-valor", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/lite/tests/semantic_segmentation/test_annotation.py b/lite/tests/semantic_segmentation/test_annotation.py index 999dd5240..89b0ba7a4 100644 --- a/lite/tests/semantic_segmentation/test_annotation.py +++ b/lite/tests/semantic_segmentation/test_annotation.py @@ -1,6 +1,10 @@ import numpy as np import pytest -from valor_lite.semantic_segmentation import Bitmask, Segmentation +from valor_lite.semantic_segmentation import ( + Bitmask, + Segmentation, + generate_segmentation, +) def test_bitmask(): @@ -78,3 +82,55 @@ def test_segmentation(): predictions=[], ) assert "missing predictions" in str(e) + + +def test_generate_segmentation(): + + # N labels > 1 + segmentation = generate_segmentation( + datum_uid="uid1", + number_of_unique_labels=3, + mask_height=2, + mask_width=3, + ) + + assert segmentation.uid == "uid1" + assert segmentation.shape == (2, 3) + assert segmentation.size == 6 + + assert len(segmentation.groundtruths) == 3 + assert all(gt.mask.dtype == np.bool_ for gt in segmentation.groundtruths) + assert all(gt.mask.shape == (2, 3) for gt in segmentation.groundtruths) + + assert len(segmentation.predictions) == 3 + assert all(pd.mask.dtype == np.bool_ for pd in segmentation.predictions) + assert all(pd.mask.shape == (2, 3) for pd in segmentation.predictions) + + # N labels = 1 + segmentation = generate_segmentation( + datum_uid="uid1", + number_of_unique_labels=1, + mask_height=2, + mask_width=3, + ) + + assert segmentation.uid == "uid1" + assert segmentation.shape == (2, 3) + assert segmentation.size == 6 + + assert len(segmentation.groundtruths) == 1 + assert all(gt.mask.dtype == np.bool_ for gt in segmentation.groundtruths) + assert all(gt.mask.shape == (2, 3) for gt in segmentation.groundtruths) + + assert len(segmentation.predictions) == 1 + assert all(pd.mask.dtype == np.bool_ for pd in segmentation.predictions) + assert all(pd.mask.shape == (2, 3) for pd in segmentation.predictions) + + # N labels = 0 + with pytest.raises(ValueError): + generate_segmentation( + datum_uid="uid1", + number_of_unique_labels=0, + mask_height=2, + mask_width=3, + ) diff --git a/lite/valor_lite/object_detection/manager.py b/lite/valor_lite/object_detection/manager.py index bd7663107..bfaacf5ab 100644 --- a/lite/valor_lite/object_detection/manager.py +++ b/lite/valor_lite/object_detection/manager.py @@ -334,6 +334,10 @@ def evaluate( return metrics +def defaultdict_int(): + return defaultdict(int) + + class DataLoader: """ Object Detection DataLoader @@ -342,8 +346,8 @@ class DataLoader: def __init__(self): self._evaluator = Evaluator() self.pairs: list[NDArray[np.float64]] = list() - self.groundtruth_count = defaultdict(lambda: defaultdict(int)) - self.prediction_count = defaultdict(lambda: defaultdict(int)) + self.groundtruth_count = defaultdict(defaultdict_int) + self.prediction_count = defaultdict(defaultdict_int) def _add_datum(self, uid: str) -> int: """ diff --git a/lite/valor_lite/profiling.py b/lite/valor_lite/profiling.py new file mode 100644 index 000000000..be275ce3f --- /dev/null +++ b/lite/valor_lite/profiling.py @@ -0,0 +1,374 @@ +import json +import math +import multiprocessing as mp +import resource +import time +from collections import deque +from multiprocessing import Queue +from typing import Any + +from tqdm import tqdm + + +class BenchmarkError(Exception): + def __init__( + self, benchmark: str, error_type: str, error_message: str + ) -> None: + super().__init__( + f"'{benchmark}' raised '{error_type}' with the following message: {error_message}" + ) + + +def _timeit_subprocess(*args, __fn, __queue: Queue, **kwargs): + """ + Multiprocessing subprocess that reports either runtime or errors. + + This is handled within a subprocess to protect the benchmark against OOM errors. + """ + try: + timer_start = time.perf_counter() + __fn(*args, **kwargs) + timer_end = time.perf_counter() + __queue.put(timer_end - timer_start) + except Exception as e: + __queue.put(e) + + +def create_runtime_profiler( + time_limit: float | None, + repeat: int = 1, +): + """ + Creates a runtime profiler as a decorating function. + + The profiler reports runtime of the wrapped function from a subprocess to protect against OOM errors. + + Parameters + ---------- + time_limit : float, optional + An optional time limit to constrain the benchmark. + repeat : int, default=1 + The number of times to repeat the benchmark to produce an average runtime. + """ + ctx = mp.get_context("spawn") + + def decorator(fn): + def wrapper(*args, **kwargs): + # Record average runtime over repeated runs. + elapsed = 0 + for _ in range(repeat): + q = ctx.Queue() + p = ctx.Process( + target=_timeit_subprocess, + args=args, + kwargs={"__fn": fn, "__queue": q, **kwargs}, + ) + p.start() + p.join(timeout=time_limit) + + # Check if computation finishes within the timeout + if p.is_alive(): + p.terminate() + p.join() + q.close() + q.join_thread() + raise TimeoutError( + f"Function '{fn.__name__}' did not complete within {time_limit} seconds." + ) + + # Retrieve the result + result = q.get(timeout=1) + if isinstance(result, Exception): + raise result + elif isinstance(result, float): + elapsed += result + else: + raise TypeError(type(result).__name__) + + return elapsed / repeat + + return wrapper + + return decorator + + +def pretty_print_results(results: tuple): + valid, invalid, permutations = results + + print( + "=====================================================================" + ) + print("Details") + print(json.dumps(permutations, indent=4)) + + if len(valid) > 0: + print() + print("Passed") + keys = ["complexity", "runtime", *valid[0]["details"].keys()] + header = " | ".join(f"{header:^15}" for header in keys) + print(header) + print("-" * len(header)) + for entry in valid: + values = [ + entry["complexity"], + round(entry["runtime"], 4), + *entry["details"].values(), + ] + row = " | ".join(f"{str(value):^15}" for value in values) + print(row) + + if len(invalid) > 0: + print() + print("Failed") + keys = ["complexity", "error", *invalid[0]["details"].keys(), "msg"] + header = " | ".join(f"{header:^15}" for header in keys) + print(header) + print("-" * len(header)) + for entry in invalid: + values = [ + entry["complexity"], + entry["error"], + *entry["details"].values(), + entry["msg"], + ] + row = " | ".join(f"{str(value):^15}" for value in values) + print(row) + + +def _calculate_complexity(params: list[int | tuple[int]]) -> int: + """ + Basic metric of benchmark complexity. + """ + flattened_params = [ + math.prod(p) if isinstance(p, tuple) else p for p in params + ] + return math.prod(flattened_params) + + +class Benchmark: + def __init__( + self, + time_limit: float | None, + memory_limit: int | None, + *_, + repeat: int | None = 1, + verbose: bool = False, + ): + self.time_limit = time_limit + self.memory_limit = memory_limit + self.repeat = repeat + self.verbose = verbose + + def get_limits( + self, + *_, + readable: bool = True, + memory_unit: str = "GB", + time_unit: str = "seconds", + ) -> dict[str, str | int | float | None]: + """ + Returns a dictionary of benchmark limits. + + Parameters + ---------- + readable : bool, default=True + Toggles whether the output should be human readable. + memory_unit : str, default="GB" + Toggles what unit to display the memory limit with when 'readable=True'. + time_unit : str, default="seconds" + Toggles what unit to display the time limit with when 'readable=True'. + + Returns + ------- + dict[str, str | int | float | None] + The benchmark limits. + """ + + memory_value = self.memory_limit + if readable and memory_value is not None: + match memory_unit: + case "TB": + memory_value /= 1024**4 + case "GB": + memory_value /= 1024**3 + case "MB": + memory_value /= 1024**2 + case "KB": + memory_value /= 1024 + case "B": + pass + case _: + valid_set = {"TB", "GB", "MB", "KB", "B"} + raise ValueError( + f"Expected memory unit to be in the set {valid_set}, received '{memory_unit}'." + ) + memory_value = f"{memory_value} {memory_unit}" + + time_value = self.time_limit + if readable and time_value is not None: + match time_unit: + case "minutes": + time_value /= 60 + case "seconds": + pass + case "milliseconds": + time_value *= 1000 + case _: + valid_set = {"minutes", "seconds", "milliseconds"} + raise ValueError( + f"Expected time unit to be in the set {valid_set}, received '{time_unit}'." + ) + time_value = f"{time_value} {time_unit}" + + return { + "memory_limit": memory_value, + "time_limit": time_value, + "repeat": self.repeat, + } + + @property + def memory_limit(self) -> int | None: + """ + The memory limit in bytes (B). + """ + return self._memory_limit + + @memory_limit.setter + def memory_limit(self, limit: int | None): + """ + Stores the memory limit and restricts resources. + """ + self._memory_limit = limit + if limit is not None: + _, hard = resource.getrlimit(resource.RLIMIT_AS) + resource.setrlimit(resource.RLIMIT_AS, (limit, hard)) + + def run( + self, + benchmark, + **kwargs: list[Any], + ): + """ + Runs a benchmark with ranges of parameters. + + Parameters + ---------- + benchmark : Callable + The benchmark function. + **kwargs : list[Any] + Keyword arguments passing lists of parameters to benchmark. The values should be sorted in + decreasing complexity. For example, if the number of labels is a parameter then a higher + number of unique labels would be considered "more" complex. + + Example + ------- + >>> b = Benchmark( + ... time_limit=10.0, + ... memory_limit=8 * (1024**3), + ... repeat=1, + ... verbose=False, + ... ) + >>> results = b.run( + ... benchmark=semseg_add_data, + ... n_labels=[ + ... 100, + ... 10, + ... ], + ... shape=[ + ... (1000, 1000), + ... (100, 100), + ... ], + ... ) + """ + + nvars = len(kwargs) + keys = tuple(kwargs.keys()) + vars = tuple(kwargs[key] for key in keys) + + initial_indices = tuple(0 for _ in range(nvars)) + max_indices = tuple(len(v) for v in vars) + permutations = math.prod(max_indices) + + # Initialize queue with the starting index (0, ...) + queue = deque() + queue.append(initial_indices) + + # Keep track of explored combinations to avoid duplicates + explored = set() + explored.add(initial_indices) + + # Store valid combinations that finish within the time limit + valid_combinations = [] + invalid_combinations = [] + + pbar = tqdm(total=math.prod(max_indices), disable=(not self.verbose)) + prev_count = 0 + while queue: + + current_indices = queue.popleft() + parameters = { + k: v[current_indices[idx]] + for idx, (k, v) in enumerate(zip(keys, vars)) + } + complexity = _calculate_complexity(list(parameters.values())) + + details: dict = {k: str(v) for k, v in parameters.items()} + + # update terminal with status + count = len(valid_combinations) + len(invalid_combinations) + pbar.update(count - prev_count) + prev_count = count + + try: + runtime = benchmark( + time_limit=self.time_limit, + repeat=self.repeat, + **parameters, + ) + valid_combinations.append( + { + "complexity": complexity, + "runtime": runtime, + "details": details, + } + ) + continue + except Exception as e: + invalid_combinations.append( + { + "complexity": complexity, + "error": type(e).__name__, + "msg": str(e), + "details": details, + } + ) + + for idx in range(nvars): + new_indices = list(current_indices) + if new_indices[idx] + 1 < max_indices[idx]: + new_indices[idx] += 1 + new_indices_tuple = tuple(new_indices) + if new_indices_tuple not in explored: + queue.append(new_indices_tuple) + explored.add(new_indices_tuple) + + valid_combinations.sort(key=lambda x: -x["complexity"]) + invalid_combinations.sort(key=lambda x: -x["complexity"]) + + # clear terminal and display results + results = ( + valid_combinations, + invalid_combinations, + { + "benchmark": benchmark.__name__, + "limits": self.get_limits(readable=True), + "passed": permutations - len(invalid_combinations), + "failed": len(invalid_combinations), + "total": permutations, + }, + ) + pbar.close() + if self.verbose: + pretty_print_results(results) + + return results diff --git a/lite/valor_lite/semantic_segmentation/__init__.py b/lite/valor_lite/semantic_segmentation/__init__.py index dfa0e2380..51bd54d02 100644 --- a/lite/valor_lite/semantic_segmentation/__init__.py +++ b/lite/valor_lite/semantic_segmentation/__init__.py @@ -1,4 +1,4 @@ -from .annotation import Bitmask, Segmentation +from .annotation import Bitmask, Segmentation, generate_segmentation from .manager import DataLoader, Evaluator from .metric import Metric, MetricType @@ -9,4 +9,5 @@ "Bitmask", "Metric", "MetricType", + "generate_segmentation", ] diff --git a/lite/valor_lite/semantic_segmentation/annotation.py b/lite/valor_lite/semantic_segmentation/annotation.py index acd99f8f7..7e96fe926 100644 --- a/lite/valor_lite/semantic_segmentation/annotation.py +++ b/lite/valor_lite/semantic_segmentation/annotation.py @@ -29,7 +29,7 @@ class Bitmask: def __post_init__(self): if self.mask.dtype != np.bool_: raise ValueError( - f"Bitmask recieved mask with dtype `{self.mask.dtype}`." + f"Bitmask recieved mask with dtype '{self.mask.dtype}'." ) @@ -94,3 +94,86 @@ def __post_init__(self): self.shape = groundtruth_shape.pop() self.size = int(np.prod(np.array(self.shape))) + + +def generate_segmentation( + datum_uid: str, + number_of_unique_labels: int, + mask_height: int, + mask_width: int, +) -> Segmentation: + """ + Generates a semantic segmentation annotation. + + Parameters + ---------- + datum_uid : str + The datum UID for the generated segmentation. + number_of_unique_labels : int + The number of unique labels. + mask_height : int + The height of the mask in pixels. + mask_width : int + The width of the mask in pixels. + + Returns + ------- + Segmentation + A generated semantic segmenatation annotation. + """ + + if number_of_unique_labels > 1: + common_proba = 0.4 / (number_of_unique_labels - 1) + min_proba = min(common_proba, 0.1) + labels = [str(i) for i in range(number_of_unique_labels)] + [None] + proba = ( + [0.5] + + [common_proba for _ in range(number_of_unique_labels - 1)] + + [0.1] + ) + elif number_of_unique_labels == 1: + labels = ["0", None] + proba = [0.9, 0.1] + min_proba = 0.1 + else: + raise ValueError( + "The number of unique labels should be greater than zero." + ) + + probabilities = np.array(proba, dtype=np.float64) + weights = (probabilities / min_proba).astype(np.int32) + + indices = np.random.choice( + np.arange(len(weights)), + size=(mask_height * 2, mask_width), + p=probabilities, + ) + + N = len(labels) + + masks = np.arange(N)[:, None, None] == indices + + gts = [] + pds = [] + for lidx in range(N): + label = labels[lidx] + if label is None: + continue + gts.append( + Bitmask( + mask=masks[lidx, :mask_height, :], + label=label, + ) + ) + pds.append( + Bitmask( + mask=masks[lidx, mask_height:, :], + label=label, + ) + ) + + return Segmentation( + uid=datum_uid, + groundtruths=gts, + predictions=pds, + ) diff --git a/lite/valor_lite/semantic_segmentation/benchmark.py b/lite/valor_lite/semantic_segmentation/benchmark.py new file mode 100644 index 000000000..b4950eac1 --- /dev/null +++ b/lite/valor_lite/semantic_segmentation/benchmark.py @@ -0,0 +1,151 @@ +from valor_lite.profiling import create_runtime_profiler +from valor_lite.semantic_segmentation import DataLoader, generate_segmentation + + +def benchmark_add_data( + n_labels: int, + shape: tuple[int, int], + time_limit: float | None, + repeat: int = 1, +) -> float: + """ + Benchmarks 'Dataloader.add_data' for semantic segmentation. + + Parameters + ---------- + n_labels : int + The number of unique labels to generate. + shape : tuple[int, int] + The size (h,w) of the mask to generate. + time_limit : float, optional + An optional time limit to constrain the benchmark. + repeat : int + The number of times to run the benchmark to produce a runtime average. + + Returns + ------- + float + The average runtime. + """ + + profile = create_runtime_profiler( + time_limit=time_limit, + repeat=repeat, + ) + + elapsed = 0 + for _ in range(repeat): + data = generate_segmentation( + datum_uid="uid", + number_of_unique_labels=n_labels, + mask_height=shape[0], + mask_width=shape[1], + ) + loader = DataLoader() + elapsed += profile(loader.add_data)([data]) + return elapsed / repeat + + +def benchmark_finalize( + n_datums: int, + n_labels: int, + time_limit: float | None, + repeat: int = 1, +): + """ + Benchmarks 'Dataloader.finalize' for semantic segmentation. + + Parameters + ---------- + n_datums : int + The number of datums to generate. + n_labels : int + The number of unique labels to generate. + time_limit : float, optional + An optional time limit to constrain the benchmark. + repeat : int + The number of times to run the benchmark to produce a runtime average. + + Returns + ------- + float + The average runtime. + """ + + profile = create_runtime_profiler( + time_limit=time_limit, + repeat=repeat, + ) + + elapsed = 0 + for _ in range(repeat): + + data = [ + generate_segmentation( + datum_uid=str(i), + number_of_unique_labels=n_labels, + mask_height=5, + mask_width=5, + ) + for i in range(10) + ] + loader = DataLoader() + for datum_idx in range(n_datums): + segmentation = data[datum_idx % 10] + segmentation.uid = str(datum_idx) + loader.add_data([segmentation]) + elapsed += profile(loader.finalize)() + return elapsed / repeat + + +def benchmark_evaluate( + n_datums: int, + n_labels: int, + time_limit: float | None, + repeat: int = 1, +): + """ + Benchmarks 'Evaluator.evaluate' for semantic segmentation. + + Parameters + ---------- + n_datums : int + The number of datums to generate. + n_labels : int + The number of unique labels to generate. + time_limit : float, optional + An optional time limit to constrain the benchmark. + repeat : int + The number of times to run the benchmark to produce a runtime average. + + Returns + ------- + float + The average runtime. + """ + + profile = create_runtime_profiler( + time_limit=time_limit, + repeat=repeat, + ) + + elapsed = 0 + for _ in range(repeat): + + data = [ + generate_segmentation( + datum_uid=str(i), + number_of_unique_labels=n_labels, + mask_height=5, + mask_width=5, + ) + for i in range(10) + ] + loader = DataLoader() + for datum_idx in range(n_datums): + segmentation = data[datum_idx % 10] + segmentation.uid = str(datum_idx) + loader.add_data([segmentation]) + evaluator = loader.finalize() + elapsed += profile(evaluator.evaluate)() + return elapsed / repeat diff --git a/lite/valor_lite/semantic_segmentation/manager.py b/lite/valor_lite/semantic_segmentation/manager.py index 8506b4e9b..50ddd283f 100644 --- a/lite/valor_lite/semantic_segmentation/manager.py +++ b/lite/valor_lite/semantic_segmentation/manager.py @@ -243,6 +243,10 @@ def evaluate( return self.compute_precision_recall_iou(filter_=filter_) +def defaultdict_int(): + return defaultdict(int) + + class DataLoader: """ Segmentation DataLoader. @@ -250,8 +254,8 @@ class DataLoader: def __init__(self): self._evaluator = Evaluator() - self.groundtruth_count = defaultdict(lambda: defaultdict(int)) - self.prediction_count = defaultdict(lambda: defaultdict(int)) + self.groundtruth_count = defaultdict(defaultdict_int) + self.prediction_count = defaultdict(defaultdict_int) self.matrices = list() self.pixel_count = list()