Skip to content

Commit

Permalink
Initial weight compression with nncf graph
Browse files Browse the repository at this point in the history
  • Loading branch information
ljaljushkin committed Oct 9, 2023
1 parent 44fb240 commit aaeaa69
Show file tree
Hide file tree
Showing 11 changed files with 1,084 additions and 925 deletions.
10 changes: 0 additions & 10 deletions nncf/openvino/graph/transformations/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,13 +229,3 @@ def __init__(self, if_node_name: str, if_body_condition: bool):
def union(self, other: "TransformationCommand") -> "TransformationCommand":
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()


class OVNF4CompressionInsertionCommand(OVInsertionCommand):
def __init__(self, target_point: OVTargetPoint, nf4_params: NF4Params):
super().__init__(target_point)
self.nf4_params = nf4_params

def union(self, other: "TransformationCommand") -> "TransformationCommand":
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()
29 changes: 0 additions & 29 deletions nncf/openvino/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from nncf.openvino.quantization.backend_parameters import BackendParameters
from nncf.openvino.quantization.backend_parameters import is_weight_compression_needed
from nncf.openvino.quantization.quantize_ifmodel import apply_algorithm_if_bodies
from nncf.openvino.quantization.weights_compression import insert_pre_compression_operations
from nncf.parameters import CompressWeightsMode
from nncf.parameters import DropType
from nncf.parameters import ModelType
Expand Down Expand Up @@ -437,31 +436,3 @@ def quantize_with_accuracy_control_impl(
advanced_quantization_parameters,
advanced_accuracy_restorer_parameters,
)


def compress_weights_impl(
model: ov.Model,
mode: CompressWeightsMode = CompressWeightsMode.INT8,
ratio: Optional[float] = None,
group_size: Optional[int] = None,
ignored_scope: Optional[IgnoredScope] = None,
) -> ov.Model:
"""
Implementation of the `compress_weights()` method for the OpenVINO backend.
:param model: an OpenVINO model for compression.
:param mode: Defines a mode for weight compression.
INT8 stands for 8-bit integer quantization of all weights.
NF4 stands for a mixed-precision weights quantization to NF4 data type. The first and last layers
are always compressed to a backup precision which is 8-bit integer by default. All others are quantized whether
to NF4 or to a backup precision depending on criteria and the given ratio.
:param ratio: the ratio between baseline and backup precisions (e.g. 0.9 means 90% of layers quantized to NF4 and
the rest to INT8).
:param group_size: number of weights (e.g. 128) in the channel dimension that share quantization parameters (scale).
The value -1 means no grouping.
:param ignored_scope: An ignored scope that defined the list of model control
flow graph nodes to be ignored during compression.
:return: The non-trainable model with compressed weights and dequantization operations.
"""
insert_pre_compression_operations(model, mode, ratio, group_size)
return model
Loading

0 comments on commit aaeaa69

Please sign in to comment.