Skip to content

Commit

Permalink
Added raising an exception when empty calibration dataset is provided (
Browse files Browse the repository at this point in the history
…#2230)

### Changes

Added raising an exception when an empty calibration dataset is
provided.

Now if empty calibration dataset is passed to `nncf.quantize()` the
following exception will be thrown:
```
Calibration dataset must not be empty. Please provide calibration dataset with at least one sample.
```

Also added a check for non-positive `subset_size` provided to
`nncf.quantize()`. Otherwise, it would error with the same statistics
not collected error.

### Reason for changes

Without an explicit exception it will error out later with a message
like:
```
File "/home/nsavel/workspace/openvino_notebooks/nncf/nncf/quantization/algorithms/min_max/algorithm.py", line 673, in apply
    raise RuntimeError(f"Statistics were not collected for the node {target_node_name}")
RuntimeError: Statistics were not collected for the node /model.2/m.2/Add
```

This is confusing and does not clearly reflect what is actually wrong.
There have been some reports, e.g. from OTX side, when an error like
this was encountered due to empty dataset. But at first it wasn't clear
what's the issue actually is, and a bug in NNCF was suspected.

I personally also encounter this sometimes during experimenting and this
triggers me to look for issues in NNCF, however it was just an empty
calibration dataset provided by mistake.

### Tests

Added a test for empty dataset to `common/test_statistics_aggregator.py`
Added a test for non-positive `subset_size` to
`tests/openvino/native/quantization/test_quantize_api.py` (openvino
only).

---------

Co-authored-by: Alexander Suslov <[email protected]>
  • Loading branch information
nikita-savelyevv and alexsu52 authored Nov 2, 2023
1 parent cb781eb commit f2cb7ae
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 2 deletions.
6 changes: 6 additions & 0 deletions nncf/common/tensor_statistics/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None:
if self.stat_subset_size is not None
else None
)
empty_statistics = True
for input_data in track(
islice(self.dataset.get_inference_data(), self.stat_subset_size),
total=total,
Expand All @@ -67,6 +68,11 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None:
outputs = engine.infer(input_data)
processed_outputs = self._process_outputs(outputs)
self._register_statistics(processed_outputs, merged_statistics)
empty_statistics = False
if empty_statistics:
raise RuntimeError(
"Calibration dataset must not be empty. Please provide calibration dataset with at least one sample."
)

def register_statistic_points(self, statistic_points: StatisticPointsContainer) -> None:
"""
Expand Down
8 changes: 6 additions & 2 deletions nncf/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def quantize(
into account while compressing in order to obtain the best performance
for this type of device.
:type target_device: nncf.TargetDevice
:param subset_size: Size of a subset to calculate activations
statistics used for quantization.
:param subset_size: Size of a subset to calculate activations statistics used for quantization.
Must be positive.
:param fast_bias_correction: Setting this option to `False` enables a different
bias correction method which is more accurate, in general, and takes
more time but requires less memory.
Expand All @@ -81,6 +81,10 @@ def quantize(
:return: The quantized model.
:rtype: TModel
"""

if subset_size < 1:
raise ValueError("Subset size must be positive.")

backend = get_backend(model)
if backend == BackendType.OPENVINO:
from nncf.openvino.quantization.quantize_model import quantize_impl
Expand Down
28 changes: 28 additions & 0 deletions tests/common/test_statistics_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,3 +894,31 @@ def test_register_statistics(self, dataset_samples, statistic_point_params):
else:
ref_subset_size = subset_size
assert statistics_aggregator.stat_subset_size == ref_subset_size

def test_collect_with_empty_dataset(self, dataset_samples):
model = self.get_backend_model(dataset_samples)
dataset_samples = []
dataset = self.get_dataset(dataset_samples)
graph = NNCFGraphFactory.create(model)

inplace_statistics = False
quantizer_config = QuantizerConfig(mode=QuantizationMode.ASYMMETRIC, per_channel=False)
target_point = self.get_target_point(TargetType.POST_LAYER_OPERATION)
algorithm_name = "TestAlgo"
statistic_point = self.create_statistics_point(
model,
quantizer_config,
target_point,
len(dataset_samples),
algorithm_name,
inplace_statistics,
RangeEstimatorParametersSet.MEAN_MINMAX,
)
statistics_points = StatisticPointsContainer()
statistics_points.add_statistic_point(statistic_point)

statistics_aggregator = self.get_statistics_aggregator(dataset)
statistics_aggregator.register_statistic_points(statistics_points)
with pytest.raises(RuntimeError) as e:
statistics_aggregator.collect_statistics(model, graph)
assert "Calibration dataset must not be empty" in e.info
37 changes: 37 additions & 0 deletions tests/openvino/native/quantization/test_quantize_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (c) 2023 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from openvino.runtime import Model
from openvino.runtime import Shape
from openvino.runtime import Type
from openvino.runtime import op
from openvino.runtime import opset8

import nncf
from nncf import Dataset
from tests.shared.datasets import MockDataset

INPUT_SHAPE = [2, 1, 1, 1]


def get_mock_model() -> Model:
param_node = op.Parameter(Type.f32, Shape(INPUT_SHAPE))
softmax_axis = 1
softmax_node = opset8.softmax(param_node, softmax_axis)
return Model(softmax_node, [param_node], "mock")


def test_non_positive_subset_size():
model_to_test = get_mock_model()

with pytest.raises(ValueError) as e:
nncf.quantize(model_to_test, Dataset(MockDataset(INPUT_SHAPE)), subset_size=0)
assert "Subset size must be positive." in e.info

0 comments on commit f2cb7ae

Please sign in to comment.