Skip to content

Commit

Permalink
Merge remote-tracking branch 'remote/develop' into onnx_BC_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
kshpv committed Nov 14, 2024
2 parents f71f6ed + 90d15a6 commit 9d15d25
Show file tree
Hide file tree
Showing 91 changed files with 28,114 additions and 28,723 deletions.
59 changes: 59 additions & 0 deletions .github/scripts/pytest_md_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) 2024 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This script generates a summary table in Markdown format from an XML report generated by pytest.
Usage in GitHub workflow:
- name: Test Summary
if: ${{ !cancelled() }}
run: |
python .github/scripts/generate_examples_summary.py pytest-results.xml >> $GITHUB_STEP_SUMMARY
"""

import sys
import xml.etree.ElementTree as ET

# Load the XML report generated by pytest
xml_file = sys.argv[1]

try:
tree = ET.parse(xml_file)
except FileNotFoundError:
sys.exit(1)

root = tree.getroot()

# Build the summary table in Markdown format
table_lines = []
table_lines.append("| Test Name | Status | Time | Message |")
table_lines.append("|:----------|:------:|-----:|:--------|")

# Iterate over test cases
for testcase in root.findall(".//testcase"):
test_name = testcase.get("name")
time_duration = float(testcase.get("time", "0"))
message = ""
if testcase.find("failure") is not None:
status = "$${\color{red}Failed}$$"
message = testcase.find("failure").get("message", "")
elif testcase.find("error") is not None:
status = "$${\color{red}Error}$$"
elif testcase.find("skipped") is not None:
status = "$${\color{orange}Skipped}$$"
message = testcase.find("skipped").get("message", "")
else:
status = "$${\color{green}Ok}$$"

# Append each row to the table
table_lines.append(f"| {test_name} | {status} | {time_duration:.0f} | {message} |")

print("\n".join(table_lines))
20 changes: 12 additions & 8 deletions .github/workflows/examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ on:
description: 'Pytest arguments'
default: ''

concurrency:
group: test-examples-${{ github.workflow }}-${{ github.ref }}-${{ github.event.inputs.pytest_args || '' }}-${{github.event.inputs.pull_request_number || ''}}
cancel-in-progress: false

jobs:
examples-cpu:
name: Test exmaples CPU [${{ matrix.group }}/4]
Expand Down Expand Up @@ -48,19 +52,19 @@ jobs:
run: pip list
- name: Run examples test scope
run: |
python -m pytest -ras tests/cross_fw/examples \
--junit-xml=pytest-results-${{ matrix.group }}.xml \
set +e
python -m pytest -s -ra tests/cross_fw/examples \
--junit-xml=pytest-results.xml \
--durations-path=tests/cross_fw/examples/.test_durations \
--splitting-algorithm=least_duration \
--splits 4 \
--group ${{ matrix.group }} \
${{ github.event.inputs.pytest_args || '' }}
ret=$?
[ $ret -eq 5 ] && [ -n "${{ github.event.inputs.pytest_args || '' }}" ] && exit 0 || exit $ret
env:
TQDM_DISABLE: 1
- name: Upload artifact
uses: actions/upload-artifact@v4
- name: Test Summary
if: ${{ !cancelled() }}
with:
name: pytest-results-${{ matrix.group }}
path: pytest-results-${{ matrix.group }}.xml
overwrite: True
run: |
python .github/scripts/pytest_md_summary.py pytest-results.xml >> $GITHUB_STEP_SUMMARY
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ examples/post_training_quantization/openvino/yolov8/yolov8n*
examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/yolov8n*
examples/**/runs/**
examples/**/results/**
examples/llm_compression/openvino/tiny_llama_find_hyperparams/statistics
compressed_graph.dot
original_graph.dot
datasets/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ The Weights Compression algorithm is aimed at compressing the weights of the mod
### Supported modes

By default, weights are compressed asymmetrically to 8-bit integer data type - "INT8_ASYM" mode.
OpenVINO backend also supports 4 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM, NF4, E2M1. The primary precision in case of INT4_SYM mode is signed 4-bit integer and weights are quantized to it [symmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) without zero point. In case of INT4_ASYM mode - unsigned 4-bit integer and weight are quantized to it [asymmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point. In case of E2M1 mode - [e2m1](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) data type without zero point and has 8bit [E8M0](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) scale.
OpenVINO backend also supports 4 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM, NF4, E2M1. The primary precision in case of INT4_SYM mode is signed 4-bit integer and weights are quantized to it [symmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) without zero point. In case of INT4_ASYM mode - unsigned 4-bit integer and weight are quantized to it [asymmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point. In case of E2M1 mode - [e2m1](https://arxiv.org/pdf/2310.10537) data type without zero point and has 8bit [E8M0](https://arxiv.org/pdf/2310.10537) scale.
All 4-bit modes have a grouped quantization support, when small group of weights (e.g. 128) in the channel dimension share quantization parameters (scale).
All embeddings, convolutions and last linear layers are always compressed to a backup mode, which is "INT8_ASYM", by default. To quantize embeddings and last linear layers to 4-bit, use `all_layers=True`.
Percent of the rest layers compressed to 4-bit can be configured by "ratio" parameter. E.g. ratio=0.9 means 90% of layers compressed to the corresponding 4-bit data type and the rest to a backup mode. OpenVINO backend supports 3 backup modes: INT8_SYM, INT8_ASYM, and NONE, which retains the original floating-point precision of the model weights. Backup mode is supported only for mixed-precision weight quantization.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

ROOT = Path(__file__).parent.resolve()
MODEL_PATH = ROOT / "compressed_model.xml"
STATISTICS_PATH = ROOT / "statistics"

COMPRESSION_MODE = nncf.parameters.CompressWeightsMode.INT4_SYM
MAX_DROP = 0.2
Expand Down Expand Up @@ -64,7 +65,7 @@ def compress_model(
group_size=group_size,
awq=awq,
sensitivity_metric=nncf.parameters.SensitivityMetric.MAX_ACTIVATION_VARIANCE,
advanced_parameters=AdvancedCompressionParameters(statistics_path="statistics"),
advanced_parameters=AdvancedCompressionParameters(statistics_path=STATISTICS_PATH),
)
return optimized_ov_model

Expand Down
15 changes: 15 additions & 0 deletions nncf/experimental/common/tensor_statistics/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,21 @@ def _reduce_out_of_place(self, x: List[Tensor]) -> List[Tensor]:
return [fns.mean(x, reduction_axes, keepdims=self._keepdims)]


class MeanVarianceReducer(TensorReducerBase):
def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
raise NotImplementedError()


class MaxVarianceReducer(TensorReducerBase):
def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
raise NotImplementedError()


class MeanAbsMaxReducer(TensorReducerBase):
def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
raise NotImplementedError()


class QuantileReducerBase(TensorReducerBase):
def __init__(
self,
Expand Down
3 changes: 3 additions & 0 deletions nncf/experimental/torch/fx/model_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def _traverse_graph(
continue

visited.add(in_node.name)
# Any constant is a stop op during the traversing procedure.
if in_node.op == "get_attr":
continue
input_nodes.extend(in_node.all_input_nodes)
input_nodes.extend(list(in_node.users))

Expand Down
4 changes: 3 additions & 1 deletion nncf/experimental/torch/fx/nncf_graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,9 @@ def get_edge_params(
if source_node.op in ("get_attr",):
tensor_shape = tuple(get_tensor_constant_from_node(source_node, model).shape)
elif "val" in source_node.meta:
if source_nncf_node.metatype is om.PTBatchNormMetatype:
if source_nncf_node.metatype is om.PTBatchNormMetatype and isinstance(
source_node.meta["val"], (tuple, list)
):
tensor = source_node.meta["val"][0]
elif source_nncf_node.metatype in [om.PTSplitMetatype, om.PTMaxMetatype, om.PTMinMetatype]:
tensor = source_node.meta["val"][output_idx]
Expand Down
2 changes: 0 additions & 2 deletions nncf/experimental/torch/fx/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from nncf.experimental.torch.fx.transformations import compress_post_quantize_transformation
from nncf.experimental.torch.fx.transformations import fq_weights_transformation
from nncf.experimental.torch.fx.transformations import revert_quantization_transformations
from nncf.experimental.torch.fx.transformations import shared_constants_unification_transformation
from nncf.parameters import BackupMode
from nncf.parameters import CompressWeightsMode
from nncf.parameters import ModelType
Expand Down Expand Up @@ -158,7 +157,6 @@ def compress_weights_impl(
backup_mode,
advanced_parameters,
)
shared_constants_unification_transformation(model)
graph = NNCFGraphFactory.create(model)
compressed_model = compression_algorithm.apply(model, graph, dataset=dataset)
compressed_model = GraphModule(compressed_model, compressed_model.graph)
Expand Down
24 changes: 1 addition & 23 deletions nncf/experimental/torch/fx/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,28 +187,6 @@ def bias_update_transformation(model: torch.fx.GraphModule):
return bias_update_transformation


def shared_constants_unification_transformation(model: torch.fx.GraphModule):
"""
checks FX graph for shared constants and eliminates redundant
shared constant while keeping only the first instance of the constant node.
This unification transformation is cruicial since the current algorithms(min_max, solver, BC, etc.)
for torch fx do not utilize the is_shared attribute of nodes for shared constants.
:param model: Target Torch FX GraphModule
"""
prev_targets = {}

for source_node in model.graph.nodes:
dist_node = list(source_node.users)
if source_node.target in prev_targets and source_node.op in ("get_attr",):
dist_node[0].replace_input_with(source_node, prev_targets[source_node.target])
else:
prev_targets[source_node.target] = source_node

model.graph.eliminate_dead_code()
model.recompile()


def constant_update_transformation_builder(
node: NNCFNode, value: torch.Tensor, input_port_id: int = 1
) -> TransformationFNType:
Expand Down Expand Up @@ -541,6 +519,7 @@ def _is_supported_batch_norm_for_training(node: torch.fx.Node):
Return True if the given node refers to an aten batch norm op QAT supports.
"""
supported_ops = [
torch.ops.aten.batch_norm.default,
torch.ops.aten._native_batch_norm_legit.default,
torch.ops.aten.cudnn_batch_norm.default,
torch.ops.aten.miopen_batch_norm.default,
Expand Down Expand Up @@ -807,7 +786,6 @@ def apply_quantization_transformations(model: torch.fx.GraphModule) -> None:
fuse_conv_bn(model)
separate_conv_and_bias(model)
separate_linear_and_bias(model)
shared_constants_unification_transformation(model)


def fold_constant_except_qdq(model: torch.fx.GraphModule):
Expand Down
22 changes: 7 additions & 15 deletions nncf/openvino/statistics/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Optional
from typing import Optional

from nncf.common.tensor import TensorType
from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer
from nncf.experimental.common.tensor_statistics.collectors import AbsQuantileReducer
from nncf.experimental.common.tensor_statistics.collectors import BatchMeanReducer
from nncf.experimental.common.tensor_statistics.collectors import InplaceInsertionFNType
from nncf.experimental.common.tensor_statistics.collectors import MaxReducer
from nncf.experimental.common.tensor_statistics.collectors import MaxVarianceReducer
from nncf.experimental.common.tensor_statistics.collectors import MeanAbsMaxReducer
from nncf.experimental.common.tensor_statistics.collectors import MeanAggregator
from nncf.experimental.common.tensor_statistics.collectors import MeanPerChReducer
from nncf.experimental.common.tensor_statistics.collectors import MeanReducer
from nncf.experimental.common.tensor_statistics.collectors import MeanVarianceReducer
from nncf.experimental.common.tensor_statistics.collectors import MinReducer
from nncf.experimental.common.tensor_statistics.collectors import NoopAggregator
from nncf.experimental.common.tensor_statistics.collectors import QuantileReducer
from nncf.experimental.common.tensor_statistics.collectors import RawReducer
from nncf.experimental.common.tensor_statistics.collectors import ShapeAggregator
from nncf.experimental.common.tensor_statistics.collectors import ShapeReducer
from nncf.experimental.common.tensor_statistics.collectors import TensorCollector
from nncf.experimental.common.tensor_statistics.collectors import TensorReducerBase
from nncf.experimental.common.tensor_statistics.statistics import MeanTensorStatistic
from nncf.experimental.common.tensor_statistics.statistics import RawTensorStatistic
from nncf.openvino.graph.node_utils import get_inplace_batch_mean_op
Expand Down Expand Up @@ -66,26 +67,17 @@ def get_inplace_fn(self):
return get_inplace_mean_op(self._reduction_axes)


class OVMeanVarianceReducer(TensorReducerBase):
def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
raise NotImplementedError()

class OVMeanVarianceReducer(MeanVarianceReducer):
def get_inplace_fn(self):
return get_inplace_mean_var_op(self._reduction_axes)


class OVMaxVarianceReducer(TensorReducerBase):
def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
raise NotImplementedError()

class OVMaxVarianceReducer(MaxVarianceReducer):
def get_inplace_fn(self):
return get_inplace_max_var_op(self._reduction_axes)


class OVMeanAbsMaxReducer(TensorReducerBase):
def _reduce_out_of_place(self, x: List[TensorType]) -> List[TensorType]:
raise NotImplementedError()

class OVMeanAbsMaxReducer(MeanAbsMaxReducer):
def get_inplace_fn(self):
return get_inplace_mean_max_op(self._reduction_axes, True)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def get_statistic_points(
return statistic_container

@abstractmethod
def _get_statistic_collector():
def _get_statistic_collector(self):
"""
Get statistic collector
"""
Expand Down Expand Up @@ -360,7 +360,7 @@ def _calc_weight_sensitivity(
return fns.linalg.norm(decompressed_weight - weight, ord="fro").item()

def _get_statistic_collector(self):
return self._backend_entity.hawq_statistic_collector()
return self._backend_entity.hawq_statistic_collector(self._subset_size)


@MIXED_PRECISION_CRITERIA.register(SensitivityMetric.MEAN_ACTIVATION_VARIANCE)
Expand Down
77 changes: 77 additions & 0 deletions tests/cross_fw/test_templates/test_weights_compression_backends.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright (c) 2024 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from abc import abstractmethod

import pytest

from nncf.experimental.common.tensor_statistics.collectors import HAWQAggregator
from nncf.experimental.common.tensor_statistics.collectors import MaxVarianceReducer
from nncf.experimental.common.tensor_statistics.collectors import MeanAbsMaxReducer
from nncf.experimental.common.tensor_statistics.collectors import MeanAggregator
from nncf.experimental.common.tensor_statistics.collectors import MeanVarianceReducer
from nncf.experimental.common.tensor_statistics.collectors import NoopReducer
from nncf.experimental.common.tensor_statistics.collectors import TensorCollector


class TemplateTestMixedPrecisionAlgoBackend:
@abstractmethod
def get_hawq_with_backend(self, subset_size: int):
"""Returns a HAWQ instance of the algorithm."""

@abstractmethod
def get_mean_variance_with_backend(self, subset_size: int):
"""Returns a Mean Variance instance of the algorithm."""

@abstractmethod
def get_max_variance_with_backend(self, subset_size: int):
"""Returns a Max Variance instance of the algorithm."""

@abstractmethod
def get_mean_max_with_backend(self, subset_size: int):
"""Returns a Mean Max instance of the algorithm."""

def check_aggregator(self, collector: TensorCollector, expected_aggregator_type, subset_size: int):
assert len(collector.aggregators) == 1, "Collector should have exactly one aggregator."
_, aggregator = collector.aggregators.popitem()
assert isinstance(
aggregator, expected_aggregator_type
), f"Expected aggregator of type {expected_aggregator_type.__name__}, got {type(aggregator).__name__}."
assert aggregator.num_samples == subset_size, "Aggregator num_samples does not match the provided subset size."

def check_reducer(self, collector: TensorCollector, expected_reducer_type):
assert len(collector.reducers) == 1
reducer = collector.reducers.pop()
assert isinstance(
reducer, expected_reducer_type
), f"Expected reducer of type {expected_reducer_type.__name__}, got {type(reducer).__name__}."

@pytest.mark.parametrize("subset_size", [1, 10, None])
@pytest.mark.parametrize(
"algo_func, aggregator_type, reducer_type",
[
("get_hawq_with_backend", HAWQAggregator, NoopReducer),
("get_mean_variance_with_backend", MeanAggregator, MeanVarianceReducer),
("get_max_variance_with_backend", MeanAggregator, MaxVarianceReducer),
("get_mean_max_with_backend", MeanAggregator, MeanAbsMaxReducer),
],
)
def test_statistic_collector(self, subset_size, algo_func, aggregator_type, reducer_type):
"""Test function to validate statistic collectors."""
algo = getattr(self, algo_func)(subset_size)
collector = algo._get_statistic_collector()

# Verify the collector instance and properties
assert isinstance(collector, TensorCollector), "Collector is not an instance of TensorCollector."

# Validate the aggregator and reducer types
self.check_aggregator(collector, aggregator_type, subset_size)
self.check_reducer(collector, reducer_type)
Loading

0 comments on commit 9d15d25

Please sign in to comment.