Skip to content

Commit

Permalink
Merge remote-tracking branch 'openvinotoolkit/develop' into nm/bf16_s…
Browse files Browse the repository at this point in the history
…upport
  • Loading branch information
KodiaqQ committed Jul 10, 2024
2 parents 8ffe8ae + 42ae1f8 commit 5405bc9
Show file tree
Hide file tree
Showing 465 changed files with 89,228 additions and 137,867 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ jobs:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: AlexanderDokuchaev/md-dead-link-check@76ecefc7f64753bba30a36179f46d903e9f77669 # v0.8
- uses: AlexanderDokuchaev/md-dead-link-check@cc3ed55268899a1a6d5fd7068abbc4591eab1f74 # v0.9
with:
config: md_dead_link_check.toml
3 changes: 3 additions & 0 deletions .github/workflows/post_pr_merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ on:
- develop
types:
- closed
paths-ignore:
- '**/*.md'
- 'docs/**/*'

jobs:
upload-coverage-common:
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/pre-commit-linters.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,6 @@ jobs:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: AlexanderDokuchaev/md-dead-link-check@76ecefc7f64753bba30a36179f46d903e9f77669 # v0.8
- uses: AlexanderDokuchaev/md-dead-link-check@cc3ed55268899a1a6d5fd7068abbc4591eab1f74 # v0.9
with:
config: md_dead_link_check.toml
2 changes: 1 addition & 1 deletion .github/workflows/precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ jobs:
lfs: true
- uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
python-version: 3.8.18
python-version: 3.9.19
cache: pip
- name: Install NNCF and test requirements
run: make install-tensorflow-test
Expand Down
2 changes: 1 addition & 1 deletion .mypy.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[mypy]
files = nncf/common/sparsity, nncf/common/graph, nncf/common/accuracy_aware_training/
files = nncf/common/sparsity, nncf/common/graph, nncf/common/accuracy_aware_training/, nncf/common/utils/
follow_imports = silent
strict = True

Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ install-openvino-test:
pip install -U pip
pip install -e .
pip install "git+https://github.com/openvinotoolkit/open_model_zoo.git@37f60eb#egg=accuracy_checker&subdirectory=tools/accuracy_checker"
pip install tensorflow==2.12.0 # Install tensorflow before to avoid conflict on install for typing-extensions
pip install -r tests/openvino/requirements.txt
pip install -r tests/cross_fw/install/requirements.txt
pip install -r tests/cross_fw/examples/requirements.txt
Expand All @@ -76,7 +75,8 @@ install-openvino-dev: install-openvino-test install-pre-commit
pip install -r examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/requirements.txt

test-openvino:
ONEDNN_MAX_CPU_ISA=AVX2 pytest ${COVERAGE_ARGS} ${NUM_WORKERS_ARG} -ra tests/openvino $(DATA_ARG) --junitxml ${JUNITXML_PATH}
ONEDNN_MAX_CPU_ISA=AVX2 pytest ${COVERAGE_ARGS} ${NUM_WORKERS_ARG} -ra tests/openvino $(DATA_ARG) \
--junitxml ${JUNITXML_PATH} --dist loadscope

test-install-openvino:
pytest tests/cross_fw/install -s \
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ conda install -c conda-forge nncf
- Python\* 3.8 or later
- Supported frameworks:
- PyTorch\* >=2.2, <2.4
- TensorFlow\* >=2.8.4, <=2.12.1
- TensorFlow\* >=2.8.4, <=2.15.1
- ONNX\* ==1.16.0
- OpenVINO\* >=2022.3.0

Expand Down
3 changes: 2 additions & 1 deletion constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ onnx==1.16.0
onnxruntime==1.17.1

# TensorFlow
tensorflow==2.12.1
tensorflow==2.12.1; python_version < '3.9'
tensorflow==2.15.1; python_version >= '3.9'

# Tests and examples
pytest==8.0.2
Expand Down
1 change: 1 addition & 0 deletions docs/Algorithms.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- Symmetric 8 bit compression mode
- Symmetric and asymmetric 4 bit compression mode
- NF4 compression mode
- E2M1 weights with E8M0 scales compression mode
- Mixed precision weights compression
- Grouped weights compression

Expand Down
4 changes: 3 additions & 1 deletion docs/Installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ as well as the supported versions of Python:

| NNCF | OpenVINO | PyTorch | ONNX | TensorFlow | Python |
|-----------|------------|----------|----------|------------|--------|
| `develop` | `2024.2.0` | `2.3.0` | `1.16.0` | `2.12.0` | `3.8` |
| `develop` | `2024.2.0` | `2.3.0` | `1.16.0` | `2.15.1` | `3.8`* |
| `2.11.0` | `2024.2.0` | `2.3.0` | `1.16.0` | `2.12.0` | `3.8` |
| `2.10.0` | `2024.1.0` | `2.2.1` | `1.16.0` | `2.12.0` | `3.8` |
| `2.9.0` | `2024.0.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` |
Expand All @@ -53,3 +53,5 @@ as well as the supported versions of Python:
| `2.6.0` | `2023.1.0` | `2.0.1` | `1.13.1` | `2.12.0` | `3.8` |
| `2.5.0` | `2023.0.0` | `1.13.1` | `1.13.1` | `2.11.1` | `3.8` |
| `2.4.0` | `2022.1.0` | `1.12.1` | `1.12.0` | `2.8.2` | `3.8` |

> (*) Python 3.9 or higher is required for TensorFlow 2.15.1
8 changes: 4 additions & 4 deletions docs/api/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,10 @@ def collect_api_entities() -> APIInfo:
"keras",
"tensorflow_addons",
# Need add backend implementation functions to avoid endless loops on registered functions by mock module,
"nncf.experimental.tensor.functions.numpy_numeric",
"nncf.experimental.tensor.functions.numpy_linalg",
"nncf.experimental.tensor.functions.torch_numeric",
"nncf.experimental.tensor.functions.torch_linalg",
"nncf.tensor.functions.numpy_numeric",
"nncf.tensor.functions.numpy_linalg",
"nncf.tensor.functions.torch_numeric",
"nncf.tensor.functions.torch_linalg",
]

with mock(mock_modules):
Expand Down
14 changes: 12 additions & 2 deletions docs/usage/post_training_compression/weights_compression/Usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ The Weights Compression algorithm is aimed at compressing the weights of the mod
#### Supported modes

By default, weights are compressed asymmetrically to 8-bit integer data type - "INT8_ASYM" mode.
OpenVINO backend also supports 3 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM and NF4. The primary precision in case of INT4_SYM mode is signed 4-bit integer and weights are quantized to it [symmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) without zero point. In case of INT4_ASYM mode - unsigned 4-bit integer and weight are quantized to it [asymmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point.
OpenVINO backend also supports 3 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM, NF4, E2M1. The primary precision in case of INT4_SYM mode is signed 4-bit integer and weights are quantized to it [symmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) without zero point. In case of INT4_ASYM mode - unsigned 4-bit integer and weight are quantized to it [asymmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point. In case of E2M1 mode - [e2m1](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) data type without zero point and has 8bit [E8M0](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) scale.
All 4-bit modes have a grouped quantization support, when small group of weights (e.g. 128) in the channel dimension share quantization parameters (scale).
All embeddings, convolutions and last linear layers are always compressed to 8-bit integer data type. To quantize embeddings and last linear layers to 4-bit, use `all_layers=True`.
Percent of the rest layers compressed to 4-bit can be configured by "ratio" parameter. E.g. ratio=0.9 means 90% of layers compressed to the corresponding 4-bit data type and the rest to 8-bit asymmetric integer data type.
Expand Down Expand Up @@ -144,6 +144,15 @@ from nncf import compress_weights, CompressWeightsMode
compressed_model = compress_weights(model, mode=CompressWeightsMode.NF4)
```

- `E2M1` mode can be considered for improving accuracy, but currently models quantized to e2m1 should not be faster models
quantized to 8-bit asymmetric integer. Here's the example how to compress weights to e2m1 data type with group size = 32 (recommended).
Different `group_size` and `ratio` are also supported.

```python
from nncf import compress_weights, CompressWeightsMode
compressed_model = compress_weights(model, mode=CompressWeightsMode.E2M1, group_size=32, all_layers=True)
```

#### Evaluation results

Here is the perplexity and model size before and after weight compression for different language models on the [Lambada OpenAI dataset](https://github.com/openai/gpt-2/issues/131#issuecomment-497136199).
Expand Down Expand Up @@ -512,8 +521,9 @@ Here is the perplexity and accuracy with data-free and data-aware mixed-precisio
- The algorithm is supported for OpenVINO and PyTorch models.
- The compression applies in-place.
- The compressed model is not trainable.
- INT4_SYM, INT4_ASYM and NF4 modes, grouped quantization and mixed precision selection is available for OpenVINO backend only.
- INT4_SYM, INT4_ASYM, NF4 and E2M1 modes, grouped quantization and mixed precision selection is available for OpenVINO backend only.
- NF4 support is experimental - models quantized to nf4 should not be faster models quantized to 8-bit integer.
- E2M1 support is experimental - models quantized to e2m1 should not be faster models quantized to 8-bit integer.

#### Additional resources

Expand Down
2 changes: 1 addition & 1 deletion examples/llm_compression/openvino/tiny_llama/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def transform_fn(data, model, tokenizer):
)
model.save_pretrained(OUTPUT_DIR)

model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR)
model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR, ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"})
input_ids = tokenizer("What is PyTorch?", return_tensors="pt").to(device=model.device)

start_t = time.time()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,12 @@ def gen_pkv(num_heads, head_dim, num_layers):

def main():
model_id = "TinyLlama/TinyLlama-1.1B-step-50K-105b" # <YOUR_MODEL_ID>
ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
ov_config = {
"PERFORMANCE_HINT": "LATENCY",
"NUM_STREAMS": "1",
"CACHE_DIR": "",
"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0",
}
model = OVModelForCausalLM.from_pretrained(
model_id,
export=True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def validate(path_to_model: Path, validation_loader: torch.utils.data.DataLoader
predictions = []
references = []

compiled_model = ov.compile_model(path_to_model)
compiled_model = ov.compile_model(path_to_model, device_name="CPU")
output = compiled_model.outputs[0]

for images, target in tqdm(validation_loader):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def validate_ov_model(
validator.stats = []
validator.batch_i = 1
validator.confusion_matrix = ConfusionMatrix(nc=validator.nc)
compiled_model = ov.compile_model(ov_model)
compiled_model = ov.compile_model(ov_model, device_name="CPU")
num_outputs = len(compiled_model.outputs)
for batch_i, batch in enumerate(data_loader):
if num_samples is not None and batch_i == num_samples:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,12 @@ def transform_fn(data_item):
int8_fps = run_benchmark(int8_ir_path, shape=[1, 3, 256, 256], verbose=True)

print("[5/7] Validate OpenVINO FP32 model:")
compiled_model = ov.compile_model(ov_model)
compiled_model = ov.compile_model(ov_model, device_name="CPU")
fp32_top1, _ = validate(compiled_model, test_loader, validation_params)
print(f"Accuracy @ top1: {fp32_top1:.3f}")

print("[6/7] Validate OpenVINO INT8 model:")
quantized_compiled_model = ov.compile_model(ov_quantized_model)
quantized_compiled_model = ov.compile_model(ov_quantized_model, device_name="CPU")
int8_top1, _ = validate(quantized_compiled_model, test_loader, validation_params)
print(f"Accuracy @ top1: {int8_top1:.3f}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def validate(model: ov.Model, val_loader: torch.utils.data.DataLoader) -> float:
predictions = []
references = []

compiled_model = ov.compile_model(model)
compiled_model = ov.compile_model(model, device_name="CPU")
output = compiled_model.outputs[0]

for images, target in tqdm(val_loader):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def validate(
validator.stats = []
validator.confusion_matrix = ConfusionMatrix(nc=validator.nc)
model.reshape({0: [1, 3, -1, -1]})
compiled_model = ov.compile_model(model)
compiled_model = ov.compile_model(model, device_name="CPU")
output_layer = compiled_model.output(0)
for batch_i, batch in enumerate(data_loader):
if num_samples is not None and batch_i == num_samples:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def validate(
validator.batch_i = 1
validator.confusion_matrix = ConfusionMatrix(nc=validator.nc)
model.reshape({0: [1, 3, -1, -1]})
compiled_model = ov.compile_model(model)
compiled_model = ov.compile_model(model, device_name="CPU")
num_outputs = len(model.outputs)
for batch_i, batch in enumerate(data_loader):
if num_samples is not None and batch_i == num_samples:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@


def validate(model: ov.Model, val_loader: tf.data.Dataset) -> tf.Tensor:
compiled_model = ov.compile_model(model)
compiled_model = ov.compile_model(model, device_name="CPU")
output = compiled_model.outputs[0]

metric = tf.keras.metrics.CategoricalAccuracy(name="acc@1")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
tensorflow~=2.12.0
tensorflow~=2.12.0; python_version < '3.9'
tensorflow~=2.15.1; python_version >= '3.9'
tensorflow-datasets
tqdm
openvino==2024.2
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def validate(model: ov.Model, val_loader: torch.utils.data.DataLoader) -> float:
predictions = []
references = []

compiled_model = ov.compile_model(model)
compiled_model = ov.compile_model(model, device_name="CPU")
output = compiled_model.outputs[0]

for images, target in track(val_loader, description="Validating"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@

import tensorflow as tf
import tensorflow.keras.backend as K
from packaging import version

from examples.tensorflow.common.object_detection.architecture import nn_ops

tensorflow_version = version.parse(version.parse(tf.__version__).base_version)


class CSPDarknet53:
"""Class to build CSPDarknet53"""
Expand All @@ -25,12 +28,17 @@ def DarknetConv2D_BN_Mish(self, *args, **kwargs):
"""Darknet Convolution2D followed by SyncBatchNormalization and Mish."""
no_bias_kwargs = {"use_bias": False}
no_bias_kwargs.update(kwargs)

if tensorflow_version < version.parse("2.15"):
mish = tf.keras.layers.Activation(self.mish)
else:
mish = tf.keras.layers.Activation("mish")

return nn_ops.compose(
nn_ops.DarknetConv2D(*args, **no_bias_kwargs),
# TODO(nsavelyev) replace by BatchNormalization(synchronized=True) once support for TF < 2.12 is dropped
tf.keras.layers.experimental.SyncBatchNormalization(),
# TODO(nsavelyev) change to tf.keras.activations.mish after upgrade to TF 2.13
tf.keras.layers.Activation(self.mish),
mish,
)

def csp_resblock_body(self, x, num_filters, num_blocks, all_narrow=True):
Expand Down
3 changes: 2 additions & 1 deletion examples/tensorflow/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ absl-py==1.0.0
tensorflow
tensorflow_datasets==4.2.0
tensorflow_hub
tensorflow_addons==0.20.0
tensorflow_addons==0.20.0; python_version < '3.9'
tensorflow_addons==0.23.0; python_version >= '3.9'
tensorflow-metadata==1.13.0
opencv-python
pycocotools==2.0.6
2 changes: 2 additions & 0 deletions md_dead_link_check.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[tool.md_dead_link_check]
exclude_files = ["ReleaseNotes.md"]
2 changes: 1 addition & 1 deletion nncf/common/accuracy_aware_training/training_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def remove_registry_prefix(algo_name: str) -> str:
)

return {
remove_registry_prefix(algo_name): controller_cls
remove_registry_prefix(algo_name): cast(CompressionAlgorithmController, controller_cls)
for algo_name, controller_cls in ADAPTIVE_COMPRESSION_CONTROLLERS.registry_dict.items()
}

Expand Down
2 changes: 1 addition & 1 deletion nncf/common/deprecation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from packaging import version


def warning_deprecated(msg):
def warning_deprecated(msg: str) -> None:
# Note: must use FutureWarning in order not to get suppressed by default
warnings.warn(msg, FutureWarning, stacklevel=2)

Expand Down
16 changes: 10 additions & 6 deletions nncf/common/graph/patterns/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,19 @@ def _get_backend_hw_patterns_map(backend: BackendType) -> Dict[HWFusedPatternNam
if backend == BackendType.ONNX:
from nncf.onnx.hardware.fused_patterns import ONNX_HW_FUSED_PATTERNS

registry = ONNX_HW_FUSED_PATTERNS.registry_dict
registry = cast(Dict[HWFusedPatternNames, Callable[[], GraphPattern]], ONNX_HW_FUSED_PATTERNS.registry_dict)
return registry
if backend == BackendType.OPENVINO:
from nncf.openvino.hardware.fused_patterns import OPENVINO_HW_FUSED_PATTERNS

registry = OPENVINO_HW_FUSED_PATTERNS.registry_dict
registry = cast(
Dict[HWFusedPatternNames, Callable[[], GraphPattern]], OPENVINO_HW_FUSED_PATTERNS.registry_dict
)
return registry
if backend == BackendType.TORCH:
from nncf.torch.hardware.fused_patterns import PT_HW_FUSED_PATTERNS

registry = PT_HW_FUSED_PATTERNS.registry_dict
registry = cast(Dict[HWFusedPatternNames, Callable[[], GraphPattern]], PT_HW_FUSED_PATTERNS.registry_dict)
return registry
raise ValueError(f"Hardware-fused patterns not implemented for {backend} backend.")

Expand All @@ -66,17 +68,19 @@ def _get_backend_ignored_patterns_map(
if backend == BackendType.ONNX:
from nncf.onnx.quantization.ignored_patterns import ONNX_IGNORED_PATTERNS

registry = ONNX_IGNORED_PATTERNS.registry_dict
registry = cast(Dict[IgnoredPatternNames, Callable[[], GraphPattern]], ONNX_IGNORED_PATTERNS.registry_dict)
return registry
if backend == BackendType.OPENVINO:
from nncf.openvino.quantization.ignored_patterns import OPENVINO_IGNORED_PATTERNS

registry = OPENVINO_IGNORED_PATTERNS.registry_dict
registry = cast(
Dict[IgnoredPatternNames, Callable[[], GraphPattern]], OPENVINO_IGNORED_PATTERNS.registry_dict
)
return registry
if backend == BackendType.TORCH:
from nncf.torch.quantization.ignored_patterns import PT_IGNORED_PATTERNS

registry = PT_IGNORED_PATTERNS.registry_dict
registry = cast(Dict[IgnoredPatternNames, Callable[[], GraphPattern]], PT_IGNORED_PATTERNS.registry_dict)
return registry
raise ValueError(f"Ignored patterns not implemented for {backend} backend.")

Expand Down
2 changes: 1 addition & 1 deletion nncf/common/tensor_statistics/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from nncf.common.tensor import NNCFTensor
from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer
from nncf.data.dataset import Dataset
from nncf.experimental.tensor.tensor import Tensor
from nncf.tensor import Tensor

TensorType = TypeVar("TensorType")
TModel = TypeVar("TModel")
Expand Down
4 changes: 2 additions & 2 deletions nncf/common/tensor_statistics/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from collections import Counter
from typing import TypeVar

from nncf.experimental.tensor import Tensor
from nncf.experimental.tensor import functions as fns
from nncf.tensor import Tensor
from nncf.tensor import functions as fns

TensorType = TypeVar("TensorType")

Expand Down
Loading

0 comments on commit 5405bc9

Please sign in to comment.