Merge remote-tracking branch 'remote/develop' into onnx_BC_fix

openvinotoolkit · Nov 14, 2024 · 9d15d25 · 9d15d25
2 parents f71f6ed + 90d15a6
commit 9d15d25
Show file tree

Hide file tree

Showing 91 changed files with 28,114 additions and 28,723 deletions.
diff --git a/.github/scripts/pytest_md_summary.py b/.github/scripts/pytest_md_summary.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2024 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This script generates a summary table in Markdown format from an XML report generated by pytest.
+
+Usage in GitHub workflow:
+    - name: Test Summary
+    if: ${{ !cancelled() }}
+    run: |
+        python .github/scripts/generate_examples_summary.py pytest-results.xml >> $GITHUB_STEP_SUMMARY
+"""
+
+import sys
+import xml.etree.ElementTree as ET
+
+# Load the XML report generated by pytest
+xml_file = sys.argv[1]
+
+try:
+    tree = ET.parse(xml_file)
+except FileNotFoundError:
+    sys.exit(1)
+
+root = tree.getroot()
+
+# Build the summary table in Markdown format
+table_lines = []
+table_lines.append("| Test Name | Status | Time | Message |")
+table_lines.append("|:----------|:------:|-----:|:--------|")
+
+# Iterate over test cases
+for testcase in root.findall(".//testcase"):
+    test_name = testcase.get("name")
+    time_duration = float(testcase.get("time", "0"))
+    message = ""
+    if testcase.find("failure") is not None:
+        status = "$${\color{red}Failed}$$"
+        message = testcase.find("failure").get("message", "")
+    elif testcase.find("error") is not None:
+        status = "$${\color{red}Error}$$"
+    elif testcase.find("skipped") is not None:
+        status = "$${\color{orange}Skipped}$$"
+        message = testcase.find("skipped").get("message", "")
+    else:
+        status = "$${\color{green}Ok}$$"
+
+    # Append each row to the table
+    table_lines.append(f"| {test_name} | {status} | {time_duration:.0f} | {message} |")
+
+print("\n".join(table_lines))
diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
@@ -13,6 +13,10 @@ on:
         description: 'Pytest arguments'
         default: ''
 
+concurrency:
+  group: test-examples-${{ github.workflow }}-${{ github.ref }}-${{ github.event.inputs.pytest_args || '' }}-${{github.event.inputs.pull_request_number || ''}}
+  cancel-in-progress: false
+
 jobs:
   examples-cpu:
     name: Test exmaples CPU [${{ matrix.group }}/4]
@@ -48,19 +52,19 @@ jobs:
         run: pip list
       - name: Run examples test scope
         run: |
-          python -m pytest -ras tests/cross_fw/examples \
-            --junit-xml=pytest-results-${{ matrix.group }}.xml \
+          set +e
+          python -m pytest -s -ra tests/cross_fw/examples \
+            --junit-xml=pytest-results.xml \
             --durations-path=tests/cross_fw/examples/.test_durations \
             --splitting-algorithm=least_duration \
             --splits 4 \
             --group ${{ matrix.group }} \
             ${{ github.event.inputs.pytest_args || '' }}
+          ret=$?
+          [ $ret -eq 5 ] && [ -n "${{ github.event.inputs.pytest_args || '' }}" ]  && exit 0 || exit $ret
         env:
           TQDM_DISABLE: 1
-      - name: Upload artifact
-        uses: actions/upload-artifact@v4
+      - name: Test Summary
         if: ${{ !cancelled() }}
-        with:
-          name: pytest-results-${{ matrix.group }}
-          path: pytest-results-${{ matrix.group }}.xml
-          overwrite: True
+        run: |
+            python .github/scripts/pytest_md_summary.py pytest-results.xml >> $GITHUB_STEP_SUMMARY
diff --git a/.gitignore b/.gitignore
@@ -129,6 +129,7 @@ examples/post_training_quantization/openvino/yolov8/yolov8n*
 examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/yolov8n*
 examples/**/runs/**
 examples/**/results/**
+examples/llm_compression/openvino/tiny_llama_find_hyperparams/statistics
 compressed_graph.dot
 original_graph.dot
 datasets/**

diff --git a/docs/usage/post_training_compression/weights_compression/Usage.md b/docs/usage/post_training_compression/weights_compression/Usage.md
@@ -22,7 +22,7 @@ The Weights Compression algorithm is aimed at compressing the weights of the mod
 ### Supported modes
 
 By default, weights are compressed asymmetrically to 8-bit integer data type - "INT8_ASYM" mode.
-OpenVINO backend also supports 4 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM, NF4, E2M1. The primary precision in case of INT4_SYM mode is signed 4-bit integer and weights are quantized to it [symmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) without zero point. In case of INT4_ASYM mode - unsigned 4-bit integer and weight are quantized to it [asymmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point. In case of E2M1 mode - [e2m1](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) data type without zero point and has 8bit [E8M0](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) scale.
+OpenVINO backend also supports 4 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM, NF4, E2M1. The primary precision in case of INT4_SYM mode is signed 4-bit integer and weights are quantized to it [symmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) without zero point. In case of INT4_ASYM mode - unsigned 4-bit integer and weight are quantized to it [asymmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point. In case of E2M1 mode - [e2m1](https://arxiv.org/pdf/2310.10537) data type without zero point and has 8bit [E8M0](https://arxiv.org/pdf/2310.10537) scale.
 All 4-bit modes have a grouped quantization support, when small group of weights (e.g. 128) in the channel dimension share quantization parameters (scale).
 All embeddings, convolutions and last linear layers are always compressed to a backup mode, which is "INT8_ASYM", by default. To quantize embeddings and last linear layers to 4-bit, use `all_layers=True`.
 Percent of the rest layers compressed to 4-bit can be configured by "ratio" parameter. E.g. ratio=0.9 means 90% of layers compressed to the corresponding 4-bit data type and the rest to a backup mode. OpenVINO backend supports 3 backup modes: INT8_SYM, INT8_ASYM, and NONE, which retains the original floating-point precision of the model weights. Backup mode is supported only for mixed-precision weight quantization.

diff --git a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py
@@ -31,6 +31,7 @@
 
 ROOT = Path(__file__).parent.resolve()
 MODEL_PATH = ROOT / "compressed_model.xml"
+STATISTICS_PATH = ROOT / "statistics"
 
 COMPRESSION_MODE = nncf.parameters.CompressWeightsMode.INT4_SYM
 MAX_DROP = 0.2
@@ -64,7 +65,7 @@ def compress_model(
         group_size=group_size,
         awq=awq,
         sensitivity_metric=nncf.parameters.SensitivityMetric.MAX_ACTIVATION_VARIANCE,
-        advanced_parameters=AdvancedCompressionParameters(statistics_path="statistics"),
+        advanced_parameters=AdvancedCompressionParameters(statistics_path=STATISTICS_PATH),
     )
     return optimized_ov_model
 

diff --git a/nncf/experimental/common/tensor_statistics/collectors.py b/nncf/experimental/common/tensor_statistics/collectors.py
@@ -465,6 +465,21 @@ def _reduce_out_of_place(self, x: List[Tensor]) -> List[Tensor]:
         return [fns.mean(x, reduction_axes, keepdims=self._keepdims)]
 
 
+class MeanVarianceReducer(TensorReducerBase):
+    def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
+        raise NotImplementedError()
+
+
+class MaxVarianceReducer(TensorReducerBase):
+    def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
+        raise NotImplementedError()
+
+
+class MeanAbsMaxReducer(TensorReducerBase):
+    def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
+        raise NotImplementedError()
+
+
 class QuantileReducerBase(TensorReducerBase):
     def __init__(
         self,

diff --git a/nncf/experimental/torch/fx/model_transformer.py b/nncf/experimental/torch/fx/model_transformer.py
@@ -84,6 +84,9 @@ def _traverse_graph(
                 continue
 
             visited.add(in_node.name)
+            # Any constant is a stop op during the traversing procedure.
+            if in_node.op == "get_attr":
+                continue
             input_nodes.extend(in_node.all_input_nodes)
             input_nodes.extend(list(in_node.users))
 

diff --git a/nncf/experimental/torch/fx/nncf_graph_builder.py b/nncf/experimental/torch/fx/nncf_graph_builder.py
@@ -187,7 +187,9 @@ def get_edge_params(
         if source_node.op in ("get_attr",):
             tensor_shape = tuple(get_tensor_constant_from_node(source_node, model).shape)
         elif "val" in source_node.meta:
-            if source_nncf_node.metatype is om.PTBatchNormMetatype:
+            if source_nncf_node.metatype is om.PTBatchNormMetatype and isinstance(
+                source_node.meta["val"], (tuple, list)
+            ):
                 tensor = source_node.meta["val"][0]
             elif source_nncf_node.metatype in [om.PTSplitMetatype, om.PTMaxMetatype, om.PTMinMetatype]:
                 tensor = source_node.meta["val"][output_idx]

diff --git a/nncf/experimental/torch/fx/quantization/quantize_model.py b/nncf/experimental/torch/fx/quantization/quantize_model.py
@@ -31,7 +31,6 @@
 from nncf.experimental.torch.fx.transformations import compress_post_quantize_transformation
 from nncf.experimental.torch.fx.transformations import fq_weights_transformation
 from nncf.experimental.torch.fx.transformations import revert_quantization_transformations
-from nncf.experimental.torch.fx.transformations import shared_constants_unification_transformation
 from nncf.parameters import BackupMode
 from nncf.parameters import CompressWeightsMode
 from nncf.parameters import ModelType
@@ -158,7 +157,6 @@ def compress_weights_impl(
         backup_mode,
         advanced_parameters,
     )
-    shared_constants_unification_transformation(model)
     graph = NNCFGraphFactory.create(model)
     compressed_model = compression_algorithm.apply(model, graph, dataset=dataset)
     compressed_model = GraphModule(compressed_model, compressed_model.graph)

diff --git a/nncf/experimental/torch/fx/transformations.py b/nncf/experimental/torch/fx/transformations.py
@@ -187,28 +187,6 @@ def bias_update_transformation(model: torch.fx.GraphModule):
     return bias_update_transformation
 
 
-def shared_constants_unification_transformation(model: torch.fx.GraphModule):
-    """
-    checks FX graph for shared constants and eliminates redundant
-    shared constant while keeping only the first instance of the constant node.
-    This unification transformation is cruicial since the current algorithms(min_max, solver, BC, etc.)
-    for torch fx do not utilize the is_shared attribute of nodes for shared constants.
-
-    :param model: Target Torch FX GraphModule
-    """
-    prev_targets = {}
-
-    for source_node in model.graph.nodes:
-        dist_node = list(source_node.users)
-        if source_node.target in prev_targets and source_node.op in ("get_attr",):
-            dist_node[0].replace_input_with(source_node, prev_targets[source_node.target])
-        else:
-            prev_targets[source_node.target] = source_node
-
-    model.graph.eliminate_dead_code()
-    model.recompile()
-
-
 def constant_update_transformation_builder(
     node: NNCFNode, value: torch.Tensor, input_port_id: int = 1
 ) -> TransformationFNType:
@@ -541,6 +519,7 @@ def _is_supported_batch_norm_for_training(node: torch.fx.Node):
     Return True if the given node refers to an aten batch norm op QAT supports.
     """
     supported_ops = [
+        torch.ops.aten.batch_norm.default,
         torch.ops.aten._native_batch_norm_legit.default,
         torch.ops.aten.cudnn_batch_norm.default,
         torch.ops.aten.miopen_batch_norm.default,
@@ -807,7 +786,6 @@ def apply_quantization_transformations(model: torch.fx.GraphModule) -> None:
     fuse_conv_bn(model)
     separate_conv_and_bias(model)
     separate_linear_and_bias(model)
-    shared_constants_unification_transformation(model)
 
 
 def fold_constant_except_qdq(model: torch.fx.GraphModule):

diff --git a/nncf/openvino/statistics/collectors.py b/nncf/openvino/statistics/collectors.py
@@ -9,25 +9,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Optional
+from typing import Optional
 
-from nncf.common.tensor import TensorType
 from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer
 from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer
 from nncf.experimental.common.tensor_statistics.collectors import BatchMeanReducer
 from nncf.experimental.common.tensor_statistics.collectors import InplaceInsertionFNType
 from nncf.experimental.common.tensor_statistics.collectors import MaxReducer
+from nncf.experimental.common.tensor_statistics.collectors import MaxVarianceReducer
+from nncf.experimental.common.tensor_statistics.collectors import MeanAbsMaxReducer
 from nncf.experimental.common.tensor_statistics.collectors import MeanAggregator
 from nncf.experimental.common.tensor_statistics.collectors import MeanPerChReducer
 from nncf.experimental.common.tensor_statistics.collectors import MeanReducer
+from nncf.experimental.common.tensor_statistics.collectors import MeanVarianceReducer
 from nncf.experimental.common.tensor_statistics.collectors import MinReducer
 from nncf.experimental.common.tensor_statistics.collectors import NoopAggregator
 from nncf.experimental.common.tensor_statistics.collectors import QuantileReducer
 from nncf.experimental.common.tensor_statistics.collectors import RawReducer
 from nncf.experimental.common.tensor_statistics.collectors import ShapeAggregator
 from nncf.experimental.common.tensor_statistics.collectors import ShapeReducer
 from nncf.experimental.common.tensor_statistics.collectors import TensorCollector
-from nncf.experimental.common.tensor_statistics.collectors import TensorReducerBase
 from nncf.experimental.common.tensor_statistics.statistics import MeanTensorStatistic
 from nncf.experimental.common.tensor_statistics.statistics import RawTensorStatistic
 from nncf.openvino.graph.node_utils import get_inplace_batch_mean_op
@@ -66,26 +67,17 @@ def get_inplace_fn(self):
         return get_inplace_mean_op(self._reduction_axes)
 
 
-class OVMeanVarianceReducer(TensorReducerBase):
-    def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
-        raise NotImplementedError()
-
+class OVMeanVarianceReducer(MeanVarianceReducer):
     def get_inplace_fn(self):
         return get_inplace_mean_var_op(self._reduction_axes)
 
 
-class OVMaxVarianceReducer(TensorReducerBase):
-    def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
-        raise NotImplementedError()
-
+class OVMaxVarianceReducer(MaxVarianceReducer):
     def get_inplace_fn(self):
         return get_inplace_max_var_op(self._reduction_axes)
 
 
-class OVMeanAbsMaxReducer(TensorReducerBase):
-    def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
-        raise NotImplementedError()
-
+class OVMeanAbsMaxReducer(MeanAbsMaxReducer):
     def get_inplace_fn(self):
         return get_inplace_mean_max_op(self._reduction_axes, True)
 

diff --git a/nncf/quantization/algorithms/weight_compression/mixed_precision.py b/nncf/quantization/algorithms/weight_compression/mixed_precision.py
@@ -281,7 +281,7 @@ def get_statistic_points(
         return statistic_container
 
     @abstractmethod
-    def _get_statistic_collector():
+    def _get_statistic_collector(self):
         """
         Get statistic collector
         """
@@ -360,7 +360,7 @@ def _calc_weight_sensitivity(
         return fns.linalg.norm(decompressed_weight - weight, ord="fro").item()
 
     def _get_statistic_collector(self):
-        return self._backend_entity.hawq_statistic_collector()
+        return self._backend_entity.hawq_statistic_collector(self._subset_size)
 
 
 @MIXED_PRECISION_CRITERIA.register(SensitivityMetric.MEAN_ACTIVATION_VARIANCE)

diff --git a/tests/cross_fw/test_templates/test_weights_compression_backends.py b/tests/cross_fw/test_templates/test_weights_compression_backends.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2024 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import abstractmethod
+
+import pytest
+
+from nncf.experimental.common.tensor_statistics.collectors import HAWQAggregator
+from nncf.experimental.common.tensor_statistics.collectors import MaxVarianceReducer
+from nncf.experimental.common.tensor_statistics.collectors import MeanAbsMaxReducer
+from nncf.experimental.common.tensor_statistics.collectors import MeanAggregator
+from nncf.experimental.common.tensor_statistics.collectors import MeanVarianceReducer
+from nncf.experimental.common.tensor_statistics.collectors import NoopReducer
+from nncf.experimental.common.tensor_statistics.collectors import TensorCollector
+
+
+class TemplateTestMixedPrecisionAlgoBackend:
+    @abstractmethod
+    def get_hawq_with_backend(self, subset_size: int):
+        """Returns a HAWQ instance of the algorithm."""
+
+    @abstractmethod
+    def get_mean_variance_with_backend(self, subset_size: int):
+        """Returns a Mean Variance instance of the algorithm."""
+
+    @abstractmethod
+    def get_max_variance_with_backend(self, subset_size: int):
+        """Returns a Max Variance instance of the algorithm."""
+
+    @abstractmethod
+    def get_mean_max_with_backend(self, subset_size: int):
+        """Returns a Mean Max instance of the algorithm."""
+
+    def check_aggregator(self, collector: TensorCollector, expected_aggregator_type, subset_size: int):
+        assert len(collector.aggregators) == 1, "Collector should have exactly one aggregator."
+        _, aggregator = collector.aggregators.popitem()
+        assert isinstance(
+            aggregator, expected_aggregator_type
+        ), f"Expected aggregator of type {expected_aggregator_type.__name__}, got {type(aggregator).__name__}."
+        assert aggregator.num_samples == subset_size, "Aggregator num_samples does not match the provided subset size."
+
+    def check_reducer(self, collector: TensorCollector, expected_reducer_type):
+        assert len(collector.reducers) == 1
+        reducer = collector.reducers.pop()
+        assert isinstance(
+            reducer, expected_reducer_type
+        ), f"Expected reducer of type {expected_reducer_type.__name__}, got {type(reducer).__name__}."
+
+    @pytest.mark.parametrize("subset_size", [1, 10, None])
+    @pytest.mark.parametrize(
+        "algo_func, aggregator_type, reducer_type",
+        [
+            ("get_hawq_with_backend", HAWQAggregator, NoopReducer),
+            ("get_mean_variance_with_backend", MeanAggregator, MeanVarianceReducer),
+            ("get_max_variance_with_backend", MeanAggregator, MaxVarianceReducer),
+            ("get_mean_max_with_backend", MeanAggregator, MeanAbsMaxReducer),
+        ],
+    )
+    def test_statistic_collector(self, subset_size, algo_func, aggregator_type, reducer_type):
+        """Test function to validate statistic collectors."""
+        algo = getattr(self, algo_func)(subset_size)
+        collector = algo._get_statistic_collector()
+
+        # Verify the collector instance and properties
+        assert isinstance(collector, TensorCollector), "Collector is not an instance of TensorCollector."
+
+        # Validate the aggregator and reducer types
+        self.check_aggregator(collector, aggregator_type, subset_size)
+        self.check_reducer(collector, reducer_type)