Skip to content

Commit

Permalink
Merge branch 'develop' into memory-logger-tool
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Jul 25, 2024
2 parents 7c2e3fd + 33bbf6e commit a49818f
Show file tree
Hide file tree
Showing 65 changed files with 3,785 additions and 311 deletions.
92 changes: 62 additions & 30 deletions .github/action_configs/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,49 +1,81 @@
# See help here: https://github.com/marketplace/actions/labeler

dependencies:
- 'requirements.txt'
- '**/setup.py'
- any:
- changed-files:
- any-glob-to-any-file:
- 'requirements.txt'
- '**/setup.py'

NNCF PT:
- 'examples/torch/**/*!(.md)'
- 'examples/post_training_quantization/torch/**/*!(.md)'
- 'nncf/torch/**/*!(.md)'
- 'tests/torch/**/*!(.md)'
- 'nncf/quantization/**/torch_backend.py'
- any:
- changed-files:
- any-glob-to-any-file:
- 'examples/torch/**/*!(.md)'
- 'examples/post_training_quantization/torch/**/*!(.md)'
- 'nncf/torch/**/*!(.md)'
- 'tests/torch/**/*!(.md)'
- 'nncf/quantization/**/torch_backend.py'

NNCF TF:
- 'examples/tensorflow/**/*!(.md)'
- 'examples/post_training_quantization/tensorflow/**/*!(.md)'
- 'nncf/tensorflow/**/*!(.md)'
- 'tests/tensorflow/**/*!(.md)'
- 'nncf/quantization/**/tf_backend.py'
- any:
- changed-files:
- any-glob-to-any-file:
- 'examples/tensorflow/**/*!(.md)'
- 'examples/post_training_quantization/tensorflow/**/*!(.md)'
- 'nncf/tensorflow/**/*!(.md)'
- 'tests/tensorflow/**/*!(.md)'
- 'nncf/quantization/**/tf_backend.py'

NNCF ONNX:
- 'examples/onnx/**/*!(.md)'
- 'examples/post_training_quantization/onnx/**/*!(.md)'
- 'nncf/onnx/**/*!(.md)'
- 'tests/onnx/**/*!(.md)'
- 'nncf/quantization/**/onnx_backend.py'
- any:
- changed-files:
- any-glob-to-any-file:
- 'examples/onnx/**/*!(.md)'
- 'examples/post_training_quantization/onnx/**/*!(.md)'
- 'nncf/onnx/**/*!(.md)'
- 'tests/onnx/**/*!(.md)'
- 'nncf/quantization/**/onnx_backend.py'

NNCF OpenVINO:
- 'examples/openvino/**/*!(.md)'
- 'examples/post_training_quantization/openvino/**/*!(.md)'
- 'nncf/openvino/**/*!(.md)'
- 'tests/openvino/**/*!(.md)'
- 'nncf/quantization/**/openvino_backend.py'
- any:
- changed-files:
- any-glob-to-any-file:
- 'examples/openvino/**/*!(.md)'
- 'examples/post_training_quantization/openvino/**/*!(.md)'
- 'nncf/openvino/**/*!(.md)'
- 'tests/openvino/**/*!(.md)'
- 'nncf/quantization/**/openvino_backend.py'

NNCF PTQ:
- 'nncf/quantization/**/*!(.md)'
- 'tests/post_training/**/*!(.md)'
- any:
- changed-files:
- any-glob-to-any-file:
- 'nncf/quantization/**/*!(.md)'
- 'tests/post_training/**/*!(.md)'

documentation:
- '**/*.md'
- 'docs/**/*'
- any:
- changed-files:
- any-glob-to-any-file:
- '**/*.md'
- 'docs/**/*'

experimental:
- 'nncf/experimental/**/*!(.md)'
- any:
- changed-files:
- any-glob-to-any-file:
- 'nncf/experimental/**/*!(.md)'

NNCF Common:
- 'examples/common/**/*!(.md)'
- 'nncf/common/**/*!(.md)'
- 'tests/common/**/*!(.md)'
- any:
- changed-files:
- any-glob-to-any-file:
- 'examples/common/**/*!(.md)'
- 'nncf/common/**/*!(.md)'
- 'tests/common/**/*!(.md)'

release target:
- any:
- base-branch:
- '^release_v*'
2 changes: 1 addition & 1 deletion .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
pull-requests: write
runs-on: ubuntu-20.04
steps:
- uses: actions/labeler@ac9175f8a1f3625fd0d4fb234536d26811351594 # v4.3.0
- uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
configuration-path: '.github/action_configs/labeler.yml'
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ learning frameworks.
| :------------------------------------------------------------------------------------------------------- | :-------: | :-------: | :-----------: | :-----------: |
| [Post-Training Quantization](./docs/usage/post_training_compression/post_training_quantization/Usage.md) | Supported | Supported | Supported | Supported |
| [Weights Compression](./docs/usage/post_training_compression/weights_compression/Usage.md) | Supported | Supported | Not supported | Not supported |
| [Activation Sparsity](./nncf/experimental/torch/sparsify_activations/ActivationSparsity.md) | Not supported | Experimental |Not supported| Not supported |

### Training-Time Compression Algorithms

Expand Down Expand Up @@ -370,7 +371,6 @@ A list of notebooks demonstrating OpenVINO conversion and inference together wit
| [Grammar Error Correction](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/grammar-correction) | Post-Training Quantization | OpenVINO | NLP, Grammar Correction |
| [LLM Instruction Following](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-question-answering) | Weight Compression | OpenVINO | NLP, Instruction Following |
| [Dolly 2.0](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/dolly-2-instruction-following) | Weight Compression | OpenVINO | NLP, Instruction Following |
| [Stable-Zephyr-3b](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/stable-zephyr-3b-chatbot) | Weight Compression | OpenVINO | NLP, Chat Bot |
| [LLM Chat Bots](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot) | Weight Compression | OpenVINO | NLP, Chat Bot |

### Post-Training Quantization Examples
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ The Weights Compression algorithm is aimed at compressing the weights of the mod
#### Supported modes

By default, weights are compressed asymmetrically to 8-bit integer data type - "INT8_ASYM" mode.
OpenVINO backend also supports 3 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM, NF4, E2M1. The primary precision in case of INT4_SYM mode is signed 4-bit integer and weights are quantized to it [symmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) without zero point. In case of INT4_ASYM mode - unsigned 4-bit integer and weight are quantized to it [asymmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point. In case of E2M1 mode - [e2m1](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) data type without zero point and has 8bit [E8M0](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) scale.
OpenVINO backend also supports 4 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM, NF4, E2M1. The primary precision in case of INT4_SYM mode is signed 4-bit integer and weights are quantized to it [symmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) without zero point. In case of INT4_ASYM mode - unsigned 4-bit integer and weight are quantized to it [asymmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point. In case of E2M1 mode - [e2m1](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) data type without zero point and has 8bit [E8M0](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) scale.
All 4-bit modes have a grouped quantization support, when small group of weights (e.g. 128) in the channel dimension share quantization parameters (scale).
All embeddings, convolutions and last linear layers are always compressed to 8-bit integer data type. To quantize embeddings and last linear layers to 4-bit, use `all_layers=True`.
Percent of the rest layers compressed to 4-bit can be configured by "ratio" parameter. E.g. ratio=0.9 means 90% of layers compressed to the corresponding 4-bit data type and the rest to 8-bit asymmetric integer data type.
Expand Down Expand Up @@ -538,5 +538,4 @@ List of notebooks demonstrating OpenVINO conversion and inference together with

- [LLM Instruction Following](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-question-answering)
- [Dolly 2.0](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/dolly-2-instruction-following)
- [Stable-Zephyr-3b](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/stable-zephyr-3b-chatbot)
- [LLM Chat Bots](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot)
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

import nncf

tfds.display_progress_bar(enable=False)
ROOT = Path(__file__).parent.resolve()
WEIGHTS_URL = "https://huggingface.co/alexsu52/mobilenet_v2_imagenette/resolve/main/tf_model.h5"
DATASET_CLASSES = 10
Expand Down
36 changes: 34 additions & 2 deletions nncf/common/graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,20 +348,52 @@ def get_input_edges(self, node: NNCFNode) -> List[NNCFGraphEdge]:
edges.extend(self._get_edges(from_node, node))
return sorted(edges, key=lambda x: x.input_port_id)

def get_input_edge_by_port_id(self, node: NNCFNode, port_id: int) -> NNCFGraphEdge:
"""
Returns the input edge for a given node, where edge.input_port_id == port_id is True.
:param node: The node for which to retrieve the input edge.
:param port_id: The ID of the input port to filter the edges.
:return: An input edge connected to the specified input port ID of the
given node.
"""
edges = [e for e in self.get_input_edges(node) if e.input_port_id == port_id]
if len(edges) == 0:
raise nncf.ValidationError(
f"Node {node.node_name} does not contain input edge connected to {port_id} port ID."
)

if len(edges) > 1:
raise nncf.InternalError(
"Unsupported graph. More than one edge was found for a given node by the specified input port ID."
)
return edges[0]

def get_output_edges(self, node: NNCFNode) -> List[NNCFGraphEdge]:
"""
Returns edges of output tensors sorted by output port ID.
:param node: Producer node.
:return: List of output edges for the node sorted by output port ID.
:return: List of output edges for the node sorted by output port ID.
"""

output_nodes = self.get_next_nodes(node)
edges = []
for to_node in output_nodes:
edges.extend(self._get_edges(node, to_node))
return sorted(edges, key=lambda x: x.output_port_id)

def get_output_edges_by_port_id(self, node: NNCFNode, port_id: int) -> List[NNCFGraphEdge]:
"""
Returns a list of output edges for a given node, filtered by the specified
output port ID (edge.output_port_id == port_id).
:param node: The node for which to retrieve the output edges.
:param port_id: The ID of the output port to filter the edges.
:return: A list of the output edges connected to the specified output port ID
of the given node.
"""
return [e for e in self.get_output_edges(node) if e.output_port_id == port_id]

def _get_edges(self, from_node: NNCFNode, to_node: NNCFNode) -> List[NNCFGraphEdge]:
edges = []
edge = self.get_edge(from_node, to_node)
Expand Down
3 changes: 3 additions & 0 deletions nncf/common/graph/patterns/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,9 @@ class HWFusedPatternNames(Enum):
LINEAR_ARITHMETIC_ACTIVATIONS_ARITHMETIC = PatternDesc("linear_arithmetic_activations_arithmetic")
LINEAR_BATCH_NORM = PatternDesc("linear_batch_norm")
LINEAR_BATCH_NORM_ACTIVATIONS = PatternDesc("linear_batch_norm_activations")
# MaskRCNN_Resnet_Atrous
LINEAR_BATCH_TO_SPACE_SCALE_SHIFT_ACTIVATIONS = PatternDesc("linear_batch_to_space_scale_shift_activations")
LINEAR_BATCH_TO_SPACE_ARITHMETIC_ACTIVATIONS = PatternDesc("linear_batch_to_space_arithmetic_activations")
LINEAR_BATCH_NORM_SCALE_SHIFT_ACTIVATIONS = PatternDesc("linear_batch_norm_scale_shift_activations")
LINEAR_SCALE_SHIFT_ACTIVATIONS = PatternDesc("linear_scale_shift_activations")
LINEAR_CONST_MULTIPLY = PatternDesc("linear_const_multiply")
Expand Down
Loading

0 comments on commit a49818f

Please sign in to comment.