Added raising an exception when empty calibration dataset is provided (…

…#2230) ### Changes Added raising an exception when an empty calibration dataset is provided. Now if empty calibration dataset is passed to `nncf.quantize()` the following exception will be thrown: ``` Calibration dataset must not be empty. Please provide calibration dataset with at least one sample. ``` Also added a check for non-positive `subset_size` provided to `nncf.quantize()`. Otherwise, it would error with the same statistics not collected error. ### Reason for changes Without an explicit exception it will error out later with a message like: ``` File "/home/nsavel/workspace/openvino_notebooks/nncf/nncf/quantization/algorithms/min_max/algorithm.py", line 673, in apply raise RuntimeError(f"Statistics were not collected for the node {target_node_name}") RuntimeError: Statistics were not collected for the node /model.2/m.2/Add ``` This is confusing and does not clearly reflect what is actually wrong. There have been some reports, e.g. from OTX side, when an error like this was encountered due to empty dataset. But at first it wasn't clear what's the issue actually is, and a bug in NNCF was suspected. I personally also encounter this sometimes during experimenting and this triggers me to look for issues in NNCF, however it was just an empty calibration dataset provided by mistake. ### Tests Added a test for empty dataset to `common/test_statistics_aggregator.py` Added a test for non-positive `subset_size` to `tests/openvino/native/quantization/test_quantize_api.py` (openvino only). --------- Co-authored-by: Alexander Suslov <[email protected]>
openvinotoolkit · Nov 2, 2023 · f2cb7ae · f2cb7ae
1 parent cb781eb
commit f2cb7ae
Show file tree

Hide file tree

Showing 4 changed files with 77 additions and 2 deletions.
diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py
@@ -59,6 +59,7 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None:
             if self.stat_subset_size is not None
             else None
         )
+        empty_statistics = True
         for input_data in track(
             islice(self.dataset.get_inference_data(), self.stat_subset_size),
             total=total,
@@ -67,6 +68,11 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None:
             outputs = engine.infer(input_data)
             processed_outputs = self._process_outputs(outputs)
             self._register_statistics(processed_outputs, merged_statistics)
+            empty_statistics = False
+        if empty_statistics:
+            raise RuntimeError(
+                "Calibration dataset must not be empty. Please provide calibration dataset with at least one sample."
+            )
 
     def register_statistic_points(self, statistic_points: StatisticPointsContainer) -> None:
         """

diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py
@@ -65,8 +65,8 @@ def quantize(
         into account while compressing in order to obtain the best performance
         for this type of device.
     :type  target_device: nncf.TargetDevice
-    :param subset_size: Size of a subset to calculate activations
-        statistics used for quantization.
+    :param subset_size: Size of a subset to calculate activations statistics used for quantization.
+        Must be positive.
     :param fast_bias_correction: Setting this option to `False` enables a different
         bias correction method which is more accurate, in general, and takes
         more time but requires less memory.
@@ -81,6 +81,10 @@ def quantize(
     :return: The quantized model.
     :rtype: TModel
     """
+
+    if subset_size < 1:
+        raise ValueError("Subset size must be positive.")
+
     backend = get_backend(model)
     if backend == BackendType.OPENVINO:
         from nncf.openvino.quantization.quantize_model import quantize_impl

diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py
@@ -894,3 +894,31 @@ def test_register_statistics(self, dataset_samples, statistic_point_params):
             else:
                 ref_subset_size = subset_size
         assert statistics_aggregator.stat_subset_size == ref_subset_size
+
+    def test_collect_with_empty_dataset(self, dataset_samples):
+        model = self.get_backend_model(dataset_samples)
+        dataset_samples = []
+        dataset = self.get_dataset(dataset_samples)
+        graph = NNCFGraphFactory.create(model)
+
+        inplace_statistics = False
+        quantizer_config = QuantizerConfig(mode=QuantizationMode.ASYMMETRIC, per_channel=False)
+        target_point = self.get_target_point(TargetType.POST_LAYER_OPERATION)
+        algorithm_name = "TestAlgo"
+        statistic_point = self.create_statistics_point(
+            model,
+            quantizer_config,
+            target_point,
+            len(dataset_samples),
+            algorithm_name,
+            inplace_statistics,
+            RangeEstimatorParametersSet.MEAN_MINMAX,
+        )
+        statistics_points = StatisticPointsContainer()
+        statistics_points.add_statistic_point(statistic_point)
+
+        statistics_aggregator = self.get_statistics_aggregator(dataset)
+        statistics_aggregator.register_statistic_points(statistics_points)
+        with pytest.raises(RuntimeError) as e:
+            statistics_aggregator.collect_statistics(model, graph)
+            assert "Calibration dataset must not be empty" in e.info
diff --git a/tests/openvino/native/quantization/test_quantize_api.py b/tests/openvino/native/quantization/test_quantize_api.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from openvino.runtime import Model
+from openvino.runtime import Shape
+from openvino.runtime import Type
+from openvino.runtime import op
+from openvino.runtime import opset8
+
+import nncf
+from nncf import Dataset
+from tests.shared.datasets import MockDataset
+
+INPUT_SHAPE = [2, 1, 1, 1]
+
+
+def get_mock_model() -> Model:
+    param_node = op.Parameter(Type.f32, Shape(INPUT_SHAPE))
+    softmax_axis = 1
+    softmax_node = opset8.softmax(param_node, softmax_axis)
+    return Model(softmax_node, [param_node], "mock")
+
+
+def test_non_positive_subset_size():
+    model_to_test = get_mock_model()
+
+    with pytest.raises(ValueError) as e:
+        nncf.quantize(model_to_test, Dataset(MockDataset(INPUT_SHAPE)), subset_size=0)
+        assert "Subset size must be positive." in e.info