From 1a8a5565f447e6aa12ff237be3db2f4a6111d242 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 6 Dec 2024 17:27:03 +0100 Subject: [PATCH] [Docs][TorchFX] NNCF quantization/compression --- .../openvino-workflow/torch-compile.rst | 94 +++++++++++++++---- 1 file changed, 75 insertions(+), 19 deletions(-) diff --git a/docs/articles_en/openvino-workflow/torch-compile.rst b/docs/articles_en/openvino-workflow/torch-compile.rst index e5bc0ca901a5aa..8c6016bfd4742f 100644 --- a/docs/articles_en/openvino-workflow/torch-compile.rst +++ b/docs/articles_en/openvino-workflow/torch-compile.rst @@ -310,10 +310,84 @@ officially. However, it can be accessed by running the following instructions: if sys.version_info >= (3, 11): `raise RuntimeError("Python 3.11+ not yet supported for torch.compile") +TorchServe Integration ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +TorchServe is a performant, flexible, and easy to use tool for serving PyTorch models in production. For more information on the details of TorchServe, +you can refer to `TorchServe github repository. `__. With OpenVINO ``torch.compile`` integration into TorchServe you can serve +PyTorch models in production and accelerate them with OpenVINO on various Intel hardware. Detailed instructions on how to use OpenVINO with TorchServe are +available in `TorchServe examples. `__ and in a `use case app `__. + +Support for Automatic1111 Stable Diffusion WebUI ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Automatic1111 Stable Diffusion WebUI is an open-source repository that hosts a browser-based interface for the Stable Diffusion +based image generation. It allows users to create realistic and creative images from text prompts. +Stable Diffusion WebUI is supported on Intel CPUs, Intel integrated GPUs, and Intel discrete GPUs by leveraging OpenVINO +``torch.compile`` capability. Detailed instructions are available in +`Stable Diffusion WebUI repository. `__ + + +Model Quantization and Weights Compression +############################################# + +Model quantization and weights compression are effective methods for accelerating model inference and reducing memory consumption, with minimal impact on model accuracy. The `torch.compile` OpenVINO backend supports two key model optimization APIs: + +1. Neural Network Compression Framework (`NNCF `__). NNCF offers advanced algorithms for post-training quantization and weights compression in the OpenVINO toolkit. + +2. PyTorch 2 export quantization. A general-purpose API designed for quantizing models captured by ``torch.export``. + +NNCF is the recommended approach for model quantization and weights compression. NNCF specifically optimizes models for the OpenVINO backend, providing optimal results in terms of inference speed and accuracy. + + +NNCF Model Optimization Support (Preview) ++++++++++++++++++++++++++++++++++++++++++++++ + +The Neural Network Compression Framework (`NNCF `__) implements advanced quantization and weights compression algorithms, which can be applied to ``torch.fx.GraphModule`` to speed up inference +and decrease memory consumption. + +Model quantization example: + +.. code-block:: python + + import nncf + import openvino.torch + import torch + + calibration_loader = torch.utils.data.DataLoader(...) + + def transform_fn(data_item): + images, _ = data_item + return images + + # Model quantization + quantized_model = nncf.quantize(model, calibration_dataset) + + quantized_model = torch.compile(quantized_model, backend="openvino") + +Model weights compression example: + +.. code-block:: python + + import nncf + import openvino.torch + import torch + + # Weights compression + compressed_model = nncf.compress_model(model) + + compressed_model = torch.compile(compressed_model, backend="openvino") + +NNCF unlocks the full potential of low-precision OpenVINO kernels due to the placement of quantizers designed specifically for the OpenVINO. +Advanced algorithms like ``SmoothQuant`` or ``BiasCorrection`` allow further metrics improvement while minimizing the outputs discrepancies between the original and compressed models. +For further details, please see the `documentation `__ +and a `tutorial `__. + Support for PyTorch 2 export quantization (Preview) +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -PyTorch 2 export quantization is supported by OpenVINO backend in ``torch.compile``. To be able +NNCF is the default way to compress models for the OpenVINO backend, however +PyTorch 2 export quantization is supported by OpenVINO backend in ``torch.compile`` as well. To be able to access this feature, follow the steps provided in `PyTorch 2 Export Post Training Quantization with X86 Backend through Inductor `__ and update the provided sample as explained below. @@ -347,24 +421,6 @@ and update the provided sample as explained below. optimized_model = torch.compile(converted_model, backend="openvino") -TorchServe Integration -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -TorchServe is a performant, flexible, and easy to use tool for serving PyTorch models in production. For more information on the details of TorchServe, -you can refer to `TorchServe github repository. `__. With OpenVINO ``torch.compile`` integration into TorchServe you can serve -PyTorch models in production and accelerate them with OpenVINO on various Intel hardware. Detailed instructions on how to use OpenVINO with TorchServe are -available in `TorchServe examples. `__ - -Support for Automatic1111 Stable Diffusion WebUI -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -Automatic1111 Stable Diffusion WebUI is an open-source repository that hosts a browser-based interface for the Stable Diffusion -based image generation. It allows users to create realistic and creative images from text prompts. -Stable Diffusion WebUI is supported on Intel CPUs, Intel integrated GPUs, and Intel discrete GPUs by leveraging OpenVINO -``torch.compile`` capability. Detailed instructions are available in -`Stable Diffusion WebUI repository. `__ - - Architecture #################