Skip to content

Commit

Permalink
Switch PyTorch PTQ to common implementation (#2227)
Browse files Browse the repository at this point in the history
### Changes

- Switch `nncf.quantize` to post training implementation for PT backend
- Removed OLD_TORCH backend from test_quantize_conformance.py
- Add support of dict as output of calibration dataset

### Related tickets

119910

---------

Co-authored-by: Daniil Lyakhov <[email protected]>
  • Loading branch information
AlexanderDokuchaev and daniil-lyakhov authored Nov 2, 2023
1 parent cdd71bb commit f8bdbc7
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 497 deletions.
10 changes: 0 additions & 10 deletions nncf/experimental/torch/quantization/__init__.py

This file was deleted.

124 changes: 0 additions & 124 deletions nncf/experimental/torch/quantization/quantize_model.py

This file was deleted.

6 changes: 3 additions & 3 deletions nncf/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def quantize(
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
model type otherwise `performance`.
:type preset: nncf.QuantizationPreset
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
Expand Down Expand Up @@ -189,7 +189,7 @@ def quantize_with_accuracy_control(
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
model type otherwise `performance`.
:type preset: nncf.QuantizationPreset
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
Expand Down Expand Up @@ -326,7 +326,7 @@ def quantize_with_tune_hyperparams(
- `performance`: Symmetric quantization of weights and activations.
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
Default value is None. In this case, `mixed` preset is used for `transformer`
model type otherwise `performace`.
model type otherwise `performance`.
:param target_device: A target device the specificity of which will be taken
into account while compressing in order to obtain the best performance
for this type of device.
Expand Down
15 changes: 10 additions & 5 deletions nncf/torch/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Dict, Union
from typing import Any, Dict, Tuple, Union

import torch
from torch import nn
Expand All @@ -32,12 +32,17 @@ def __init__(self, model: nn.Module):
self._model = model
self._model.eval()

def infer(self, input_data: Union[torch.Tensor, Dict[str, torch.Tensor]]) -> Union[torch.Tensor, Dict[str, Any]]:
def infer(
self, input_data: Union[torch.Tensor, Tuple[torch.Tensor], Dict[str, torch.Tensor]]
) -> Union[torch.Tensor, Dict[str, Any]]:
"""
Runs Torch model on the provided input.
:param input_data: inputs for the model
:return output_data: model outputs
:param input_data: Inputs for the model.
:return: Model outputs.
"""

if isinstance(input_data, dict):
return self._model(**input_data)
if isinstance(input_data, tuple):
return self._model(*input_data)
return self._model(input_data)
Loading

0 comments on commit f8bdbc7

Please sign in to comment.