From c64392876aa8ca256af48813f2b72b4e885fbfbe Mon Sep 17 00:00:00 2001
From: Benjamin Wilson <benjaminrwilson@users.noreply.github.com>
Date: Sat, 5 Jun 2021 08:45:29 -0400
Subject: [PATCH] Detection evaluation optimizations.

---
 argoverse/evaluation/detection/eval.py  | 79 ++++++++++++-------------
 argoverse/evaluation/detection/utils.py | 30 +++++++---
 setup.py                                |  1 +
 tests/test_eval_detection.py            |  5 +-
 4 files changed, 64 insertions(+), 51 deletions(-)

diff --git a/argoverse/evaluation/detection/eval.py b/argoverse/evaluation/detection/eval.py
index 0c20a9f4..50c1bc8c 100644
--- a/argoverse/evaluation/detection/eval.py
+++ b/argoverse/evaluation/detection/eval.py
@@ -60,50 +60,48 @@
 """
 import argparse
 import logging
-import os
+import multiprocessing as mp
 from collections import defaultdict
-from multiprocessing import Pool
+from dataclasses import dataclass
 from pathlib import Path
-from typing import DefaultDict, List
+from typing import DefaultDict, Dict, List, Optional
 
 import numpy as np
 import pandas as pd
+from tqdm.contrib.concurrent import process_map
 
 from argoverse.evaluation.detection.constants import N_TP_ERRORS, SIGNIFICANT_DIGITS, STATISTIC_NAMES
-from argoverse.evaluation.detection.utils import DetectionCfg, accumulate, calc_ap, plot
+from argoverse.evaluation.detection.utils import AccumulateJob, DetectionCfg, accumulate, calc_ap, plot
 from argoverse.map_representation.map_api import ArgoverseMap
 
 logger = logging.getLogger(__name__)
 
 
+@dataclass
 class DetectionEvaluator:
-    """Instantiates a DetectionEvaluator object for evaluation."""
-
-    def __init__(
-        self,
-        dt_root_fpath: Path,
-        gt_root_fpath: Path,
-        figs_fpath: Path,
-        cfg: DetectionCfg = DetectionCfg(),
-        num_procs: int = -1,
-    ) -> None:
-        """
-        Args:
-            dt_fpath_root: Path to the folder which contains the detections.
-            gt_fpath_root: Path to the folder which contains the split of logs.
-            figs_fpath: Path to the folder which will contain the output figures.
-            cfg: Detection configuration settings.
-            num_procs: Number of processes among which to subdivide work.
-                Specifying -1 will use one process per available core
-        """
-        self.dt_root_fpath = dt_root_fpath
-        self.gt_root_fpath = gt_root_fpath
-        self.figs_fpath = figs_fpath
-        self.cfg = cfg
-        self.num_procs = os.cpu_count() if num_procs == -1 else num_procs
-        self.avm = (
-            ArgoverseMap(self.cfg.map_root) if self.cfg.eval_only_roi_instances else None
-        )  # map is only required if using Region of Interest (ROI) information to filter objects
+    """A DetectionEvaluator object for evaluation.
+
+    Args:
+        dt_fpath_root: Path to the folder which contains the detections.
+        gt_fpath_root: Path to the folder which contains the split of logs.
+        figs_fpath: Path to the folder which will contain the output figures.
+        cfg: Detection configuration settings.
+        num_procs: Number of processes among which to subdivide work.
+            Specifying -1 will use one process per available core
+    """
+
+    dt_root_fpath: Path
+    gt_root_fpath: Path
+    figs_fpath: Path
+    cfg: DetectionCfg = DetectionCfg()
+    num_procs: int = -1
+
+    avm: Optional[ArgoverseMap] = (
+        ArgoverseMap(cfg.map_root) if cfg.eval_only_roi_instances else None
+    )  # map is only required if using Region of Interest (ROI) information to filter objects
+
+    def __post_init__(self) -> None:
+        self.num_procs = mp.cpu_count() if self.num_procs == -1 else self.num_procs
 
     def evaluate(self) -> pd.DataFrame:
         """Evaluate detection output and return metrics. The multiprocessing
@@ -119,16 +117,15 @@ def evaluate(self) -> pd.DataFrame:
         gt_fpaths = list(self.gt_root_fpath.glob("*/per_sweep_annotations_amodal/*.json"))
 
         assert len(dt_fpaths) == len(gt_fpaths)
-        data: DefaultDict[str, np.ndarray] = defaultdict(list)
+        data: DefaultDict[str, List[np.ndarray]] = defaultdict(list)
         cls_to_ninst: DefaultDict[str, int] = defaultdict(int)
 
+        jobs = [AccumulateJob(self.dt_root_fpath, gt_fpath, self.cfg, self.avm) for gt_fpath in gt_fpaths]
         if self.num_procs == 1:
-            accum = [accumulate(self.dt_root_fpath, gt_fpath, self.cfg, self.avm) for gt_fpath in gt_fpaths]
-
+            accum = [accumulate(job) for job in jobs]
         else:
-            args = [(self.dt_root_fpath, gt_fpath, self.cfg, self.avm) for gt_fpath in gt_fpaths]
-            with Pool(self.num_procs) as p:
-                accum = p.starmap(accumulate, args)
+            chunksize = max(1, len(jobs) // self.num_procs)
+            accum = process_map(accumulate, jobs, max_workers=self.num_procs, chunksize=chunksize)
 
         for frame_stats, frame_cls_to_inst in accum:
             for cls_name, cls_stats in frame_stats.items():
@@ -136,12 +133,12 @@ def evaluate(self) -> pd.DataFrame:
             for cls_name, num_inst in frame_cls_to_inst.items():
                 cls_to_ninst[cls_name] += num_inst
 
-        data = defaultdict(np.ndarray, {k: np.vstack(v) for k, v in data.items()})
+        aggregated_data = {k: np.vstack(v) for k, v in data.items()}
 
         init_data = {dt_cls: self.cfg.summary_default_vals for dt_cls in self.cfg.dt_classes}
         summary = pd.DataFrame.from_dict(init_data, orient="index", columns=STATISTIC_NAMES)
         summary_update = pd.DataFrame.from_dict(
-            self.summarize(data, cls_to_ninst), orient="index", columns=STATISTIC_NAMES
+            self.summarize(aggregated_data, cls_to_ninst), orient="index", columns=STATISTIC_NAMES
         )
 
         summary.update(summary_update)
@@ -152,7 +149,7 @@ def evaluate(self) -> pd.DataFrame:
         return summary
 
     def summarize(
-        self, data: DefaultDict[str, np.ndarray], cls_to_ninst: DefaultDict[str, int]
+        self, data: Dict[str, np.ndarray], cls_to_ninst: DefaultDict[str, int]
     ) -> DefaultDict[str, List[float]]:
         """Calculate and print the detection metrics.
 
@@ -205,8 +202,6 @@ def summarize(
             cds = ap * tp_scores.mean()
 
             summary[cls_name] = [ap, *tp_metrics, cds]
-
-        logger.info(f"summary = {summary}")
         return summary
 
 
diff --git a/argoverse/evaluation/detection/utils.py b/argoverse/evaluation/detection/utils.py
index 07326277..53f5daad 100644
--- a/argoverse/evaluation/detection/utils.py
+++ b/argoverse/evaluation/detection/utils.py
@@ -13,6 +13,7 @@
 import logging
 import os
 from collections import defaultdict
+from dataclasses import dataclass
 from enum import Enum, auto
 from pathlib import Path
 from typing import DefaultDict, List, NamedTuple, Optional, Tuple, Union
@@ -99,15 +100,28 @@ class DetectionCfg(NamedTuple):
     map_root: _PathLike = Path(__file__).parent.parent.parent.parent / "map_files"  # argoverse-api/map_files
 
 
-def accumulate(
-    dt_root_fpath: Path, gt_fpath: Path, cfg: DetectionCfg, avm: Optional[ArgoverseMap]
-) -> Tuple[DefaultDict[str, np.ndarray], DefaultDict[str, int]]:
-    """Accumulate the true/false positives (boolean flags) and true positive errors for each class.
+@dataclass
+class AccumulateJob:
+    """Dataclass args for running `accumulate`.
 
     Args:
         dt_root_fpath: Detections root folder file path.
         gt_fpath: Ground truth file path.
         cfg: Detection configuration.
+        avm: Argoverse map object.
+    """
+
+    dt_root_fpath: Path
+    gt_fpath: Path
+    cfg: DetectionCfg
+    avm: Optional[ArgoverseMap]
+
+
+def accumulate(job: AccumulateJob) -> Tuple[DefaultDict[str, np.ndarray], DefaultDict[str, int]]:
+    """Accumulate the true/false positives (boolean flags) and true positive errors for each class.
+
+    Args:
+        job: Accumulate job.
 
     Returns:
         cls_to_accum: Class to accumulated statistics dictionary of shape |C| -> (N, K + S) where C
@@ -116,8 +130,10 @@ def accumulate(
         cls_to_ninst: Mapping of shape |C| -> (1,) the class names to the number of instances in the ground
             truth dataset.
     """
+    dt_root_fpath, gt_fpath, cfg, avm = job.dt_root_fpath, job.gt_fpath, job.cfg, job.avm
+
     log_id = gt_fpath.parents[1].stem
-    logger.info(f"log_id = {log_id}")
+    logger.debug(f"log_id = {log_id}")
     ts = int(gt_fpath.stem.split("_")[-1])
 
     dt_fpath = dt_root_fpath / f"{log_id}/per_sweep_annotations_amodal/" f"tracked_object_labels_{ts}.json"
@@ -154,8 +170,8 @@ def accumulate(
         )
         gt_filtered = remove_duplicate_instances(gt_filtered, cfg)
 
-        logger.info(f"{dt_filtered.shape[0]} detections")
-        logger.info(f"{gt_filtered.shape[0]} ground truth")
+        logger.debug(f"{dt_filtered.shape[0]} detections")
+        logger.debug(f"{gt_filtered.shape[0]} ground truth")
         if dt_filtered.shape[0] > 0:
             ranked_dts, ranked_scores = rank(dt_filtered)
 
diff --git a/setup.py b/setup.py
index a74b814c..f120e041 100755
--- a/setup.py
+++ b/setup.py
@@ -67,6 +67,7 @@
         "scipy>=1.4.0",
         "shapely",
         "sklearn",
+        "tqdm",
         "typing_extensions",
     ],
     # for older pip version, use with --process-dependency-links
diff --git a/tests/test_eval_detection.py b/tests/test_eval_detection.py
index 35b7c660..e5fc07c3 100644
--- a/tests/test_eval_detection.py
+++ b/tests/test_eval_detection.py
@@ -19,6 +19,7 @@
 from argoverse.data_loading.pose_loader import get_city_SE3_egovehicle_at_sensor_t
 from argoverse.evaluation.detection.eval import DetectionEvaluator
 from argoverse.evaluation.detection.utils import (
+    AccumulateJob,
     AffFnType,
     DetectionCfg,
     DistFnType,
@@ -200,13 +201,13 @@ def test_wrap_angle() -> None:
 def test_accumulate() -> None:
     """Verify that the accumulate function matches known output for a self-comparison."""
     cfg = DetectionCfg(eval_only_roi_instances=False)
-    # compare a set of labels to itself
-    cls_to_accum, cls_to_ninst = accumulate(
+    job = AccumulateJob(
         TEST_DATA_LOC / "detections",
         TEST_DATA_LOC / "detections/1/per_sweep_annotations_amodal/tracked_object_labels_0.json",
         cfg,
         avm=None,  # ArgoverseMap instance not required when not using ROI info in evaluation
     )
+    cls_to_accum, cls_to_ninst = accumulate(job)
     # ensure the detections match at all thresholds, have 0 TP errors, and have AP = 1
     expected_ATE = 0.0
     expected_ASE = 0.0