openvinotoolkit · yunchu · Nov 20, 2023 · Nov 21, 2023
@@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file.
 - Update ModelAPI configuration(<https://github.com/openvinotoolkit/training_extensions/pull/2564>)
 - Add Anomaly modelAPI changes (<https://github.com/openvinotoolkit/training_extensions/pull/2563>)
 - Update Image numpy access (<https://github.com/openvinotoolkit/training_extensions/pull/2586>)
+- Make max_num_detections configurable (<https://github.com/openvinotoolkit/training_extensions/pull/2647>)
 
 ### Bug fixes
 

@@ -8,6 +8,7 @@
 from typing import Callable, Dict, List
 
 import numpy as np
+from torch.cuda import is_available as cuda_available
 
 from otx.algorithms.common.adapters.torch.utils import BsSearchAlgo
 from otx.algorithms.common.utils.logger import get_logger
@@ -53,6 +54,10 @@ def adapt_batch_size(train_func: Callable, cfg, datasets: List, validate: bool =
         not_increase (bool) : Whether adapting batch size to larger value than default value or not.
     """
 
+    if not cuda_available():
+        logger.warning("Skip Auto-adaptive batch size: CUDA should be available, but it isn't.")
+        return
+
     def train_func_single_iter(batch_size):
         copied_cfg = deepcopy(cfg)
         _set_batch_size(copied_cfg, batch_size)

@@ -1,18 +1,7 @@
 """Base Configuration of OTX Common Algorithms."""
 
-# Copyright (C) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# Copyright (C) 2022-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 from sys import maxsize
 
@@ -227,6 +216,16 @@ class BasePostprocessing(ParameterGroup):
             affects_outcome_of=ModelLifecycle.INFERENCE,
         )
 
+        max_num_detections = configurable_integer(
+            header="Maximum number of detection per image",
+            description="Extra detection outputs will be discared in non-maximum suppression process. "
+            "Defaults to 0, which means per-model default value.",
+            default_value=0,
+            min_value=0,
+            max_value=10000,
+            affects_outcome_of=ModelLifecycle.INFERENCE,
+        )
+
         use_ellipse_shapes = configurable_boolean(
             default_value=False,
             header="Use ellipse shapes",

@@ -64,13 +64,14 @@ def configure(
         ir_options=None,
         data_classes=None,
         model_classes=None,
+        max_num_detections=0,
     ):
         """Create MMCV-consumable config from given inputs."""
         logger.info(f"configure!: training={training}")
 
         self.configure_base(cfg, data_cfg, data_classes, model_classes)
         self.configure_device(cfg, training)
-        self.configure_model(cfg, ir_options)
+        self.configure_model(cfg, ir_options, max_num_detections)
         self.configure_ckpt(cfg, model_ckpt)
         self.configure_data(cfg, training, data_cfg)
         self.configure_regularization(cfg, training)
@@ -113,7 +114,7 @@ def configure_base(self, cfg, data_cfg, data_classes, model_classes):
             new_classes = np.setdiff1d(data_classes, model_classes).tolist()
             train_data_cfg["new_classes"] = new_classes
 
-    def configure_model(self, cfg, ir_options):  # noqa: C901
+    def configure_model(self, cfg, ir_options, max_num_detections=0):  # noqa: C901
         """Patch config's model.
 
         Change model type to super type
@@ -149,6 +150,23 @@ def is_mmov_model(key, value):
                 {"model_path": ir_model_path, "weight_path": ir_weight_path, "init_weight": ir_weight_init},
             )
 
+        # Test config
+        if max_num_detections > 0:
+            logger.info(f"Model max_num_detections: {max_num_detections}")
+            test_cfg = cfg.model.test_cfg
+            test_cfg.max_per_img = max_num_detections
+            test_cfg.nms_pre = max_num_detections * 10
+            # Special cases for 2-stage detectors (e.g. MaskRCNN)
+            if hasattr(test_cfg, "rpn"):
+                test_cfg.rpn.nms_pre = max_num_detections * 20
+                test_cfg.rpn.max_per_img = max_num_detections * 10
+            if hasattr(test_cfg, "rcnn"):
+                test_cfg.rcnn.max_per_img = max_num_detections
+            train_cfg = cfg.model.train_cfg
+            if hasattr(train_cfg, "rpn_proposal"):
+                train_cfg.rpn_proposal.nms_pre = max_num_detections * 20
+                train_cfg.rpn_proposal.max_per_img = max_num_detections * 10
+
     def configure_data(self, cfg, training, data_cfg):  # noqa: C901
         """Patch cfg.data.
 

@@ -1,18 +1,7 @@
 """Task of OTX Detection using mmdetection training backend."""
 
 # Copyright (C) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# SPDX-License-Identifier: Apache-2.0
 
 import glob
 import io
@@ -206,6 +195,7 @@ def configure(self, training=True, subset="train", ir_options=None, train_datase
             ir_options,
             data_classes,
             model_classes,
+            self.max_num_detections,
         )
         if should_cluster_anchors(self._recipe_cfg):
             if train_dataset is not None:
@@ -513,6 +503,12 @@ def _export_model(
         assert len(self._precision) == 1
         export_options["precision"] = str(self._precision[0])
         export_options["type"] = str(export_format)
+        if self.max_num_detections > 0:
+            logger.info(f"Export max_num_detections: {self.max_num_detections}")
+            post_proc_cfg = export_options["deploy_cfg"]["codebase_config"]["post_processing"]
+            post_proc_cfg["max_output_boxes_per_class"] = self.max_num_detections
+            post_proc_cfg["keep_top_k"] = self.max_num_detections
+            post_proc_cfg["pre_top_k"] = self.max_num_detections * 10
 
         export_options["deploy_cfg"]["dump_features"] = dump_features
         if dump_features:

@@ -1,18 +1,7 @@
 """Openvino Task of Detection."""
 
-# Copyright (C) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# Copyright (C) 2021-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 import copy
 import io

@@ -1,18 +1,7 @@
 """Configuration file of OTX Detection."""
 
-# Copyright (C) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# Copyright (C) 2022-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 from attr import attrs
 

@@ -258,6 +258,25 @@ postprocessing:
     value: 0.01
     visible_in_ui: true
     warning: null
+  max_num_detections:
+    affects_outcome_of: INFERENCE
+    default_value: 0
+    description:
+      Extra detection outputs will be discared in non-maximum suppression process.
+      Defaults to 0, which means per-model default values.
+    editable: true
+    header: Maximum number of detections per image
+    max_value: 10000
+    min_value: 0
+    type: INTEGER
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0
+    visible_in_ui: true
+    warning: null
   use_ellipse_shapes:
     affects_outcome_of: INFERENCE
     default_value: false

@@ -258,6 +258,25 @@ postprocessing:
     value: 0.01
     visible_in_ui: true
     warning: null
+  max_num_detections:
+    affects_outcome_of: INFERENCE
+    default_value: 0
+    description:
+      Extra detection outputs will be discared in non-maximum suppression process.
+      Defaults to 0, which means per-model default values.
+    editable: true
+    header: Maximum number of detections per image
+    max_value: 10000
+    min_value: 0
+    type: INTEGER
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0
+    visible_in_ui: true
+    warning: null
   use_ellipse_shapes:
     affects_outcome_of: INFERENCE
     default_value: false

@@ -115,9 +115,7 @@
             nms=dict(type="nms", iou_threshold=0.7),
             min_bbox_size=0,
         ),
-        rcnn=dict(
-            score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5, max_num=100), max_per_img=100, mask_thr_binary=0.5
-        ),
+        rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100, mask_thr_binary=0.5),
     ),
 )
 

@@ -1,18 +1,7 @@
 """Model configuration of Resnet50-MaskRCNN model for Instance-Seg Task."""
 
-# Copyright (C) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# Copyright (C) 2022-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 # pylint: disable=invalid-name
 
@@ -149,7 +138,7 @@
         ),
         rcnn=dict(
             score_thr=0.05,
-            nms=dict(type="nms", iou_threshold=0.5, max_num=100),
+            nms=dict(type="nms", iou_threshold=0.5),
             max_per_img=100,
             mask_thr_binary=0.5,
         ),

@@ -277,6 +277,25 @@ postprocessing:
     warning: null
   type: PARAMETER_GROUP
   visible_in_ui: true
+  max_num_detections:
+    affects_outcome_of: INFERENCE
+    default_value: 0
+    description:
+      Extra detection outputs will be discared in non-maximum suppression process.
+      Defaults to 0, which means per-model default values.
+    editable: true
+    header: Maximum number of detections per image
+    max_value: 10000
+    min_value: 0
+    type: INTEGER
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0
+    visible_in_ui: true
+    warning: null
 algo_backend:
   description: parameters for algo backend
   header: Algo backend parameters

@@ -139,7 +139,7 @@
         ),
         rcnn=dict(
             score_thr=0.05,
-            nms=dict(type="nms", iou_threshold=0.5, max_num=100),
+            nms=dict(type="nms", iou_threshold=0.5),
             max_per_img=100,
             mask_thr_binary=0.5,
         ),

@@ -1,18 +1,7 @@
 """Task of OTX Detection."""
 
 # Copyright (C) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# SPDX-License-Identifier: Apache-2.0
 
 import io
 import os
@@ -83,11 +72,13 @@ def __init__(self, task_environment: TaskEnvironment, output_path: Optional[str]
         )
         self._anchors: Dict[str, int] = {}
 
+        self.confidence_threshold = 0.0
+        self.max_num_detections = 0
         if hasattr(self._hyperparams, "postprocessing"):
             if hasattr(self._hyperparams.postprocessing, "confidence_threshold"):
                 self.confidence_threshold = self._hyperparams.postprocessing.confidence_threshold
-        else:
-            self.confidence_threshold = 0.0
+            if hasattr(self._hyperparams.postprocessing, "max_num_detections"):
+                self.max_num_detections = self._hyperparams.postprocessing.max_num_detections
 
         if task_environment.model is not None:
             self._load_model()
@@ -112,6 +103,11 @@ def _load_postprocessing(self, model_data):
             hparams.use_ellipse_shapes = loaded_postprocessing["use_ellipse_shapes"]["value"]
         else:
             hparams.use_ellipse_shapes = False
+        if "max_num_detections" in loaded_postprocessing:
+            trained_max_num_detections = loaded_postprocessing["max_num_detections"]["value"]
+            # Prefer new hparam value set by user (>0) intentionally than trained value
+            if self.max_num_detections == 0:
+                self.max_num_detections = trained_max_num_detections
 
     def _load_tiling_parameters(self, model_data):
         """Load tiling parameters from PyTorch model.

@@ -36,7 +36,15 @@
     "--val-data-roots": "tests/assets/car_tree_bug",
     "--test-data-roots": "tests/assets/car_tree_bug",
     "--input": "tests/assets/car_tree_bug/images/train",
-    "train_params": ["params", "--learning_parameters.num_iters", "1", "--learning_parameters.batch_size", "4"],
+    "train_params": [
+        "params",
+        "--learning_parameters.num_iters",
+        "1",
+        "--learning_parameters.batch_size",
+        "4",
+        "--postprocessing.max_num_detections",
+        "200",
+    ],
 }
 
 args_semisl = {