Skip to content

Commit

Permalink
Deprecate onnx/ort model export and quantization (huggingface#795)
Browse files Browse the repository at this point in the history
* deprecate onnx/ort model export and quantization

* fix
  • Loading branch information
IlyasMoutawwakil authored Jul 1, 2024
1 parent 92fe39f commit 384dda5
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 15 deletions.
6 changes: 6 additions & 0 deletions optimum/intel/neural_compressor/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from typing import Dict, Optional, Union

from neural_compressor.config import DistillationConfig, WeightPruningConfig, _BaseQuantizationConfig
Expand All @@ -28,6 +29,8 @@
"post_training_weight_only": "weight_only",
}

logger = logging.getLogger(__name__)


class INCConfig(BaseConfig):
CONFIG_NAME = "inc_config.json"
Expand All @@ -49,6 +52,9 @@ def __init__(
self.distillation = self._create_distillation_config(distillation) or {}
self.save_onnx_model = save_onnx_model

if self.save_onnx_model:
logger.warning("ONNX model saving is deprecated and will be removed soon.")

@staticmethod
def _create_quantization_config(config: Union[Dict, _BaseQuantizationConfig]):
# TODO : add activations_dtype and weights_dtype
Expand Down
33 changes: 23 additions & 10 deletions optimum/intel/neural_compressor/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,15 @@ def quantize(
use_xpu = device == torch.device("xpu") or device == "xpu"
calibration_dataloader = None

if save_onnx_model:
logger.warning("ONNX model export is deprecated and will be removed soon.")

if isinstance(self._original_model, ORTModel):
logger.warning("ONNX model quantization is deprecated and will be removed soon.")

if save_onnx_model and isinstance(self._original_model, ORTModel):
logger.warning("The model provided is already an ONNX model. Setting `save_onnx_model` to False.")
save_onnx_model = False
logger.warning("Model provided is an ONNX model, `save_onnx_model` is set to False")

default_name = WEIGHTS_NAME if not isinstance(self._original_model, ORTModel) else ONNX_WEIGHTS_NAME
self._set_task()
Expand All @@ -223,13 +229,16 @@ def quantize(
f"but only version {IPEX_MINIMUM_VERSION} or higher is supported."
)

if save_onnx_model:
if (
not isinstance(quantization_config, PostTrainingQuantConfig)
or INCQuantizationMode(quantization_config.approach) == INCQuantizationMode.DYNAMIC
):
logger.warning("ONNX export for dynamic and weight only quantized model is not supported.")
save_onnx_model = False
if save_onnx_model and (
not isinstance(quantization_config, PostTrainingQuantConfig)
or INCQuantizationMode(quantization_config.approach) == INCQuantizationMode.DYNAMIC
):
logger.warning(
"ONNX export for dynamic and weight only quantized model is not supported. "
"Only static quantization model can be exported to ONNX format. "
"Setting `save_onnx_model` to False."
)
save_onnx_model = False

# ITREX Weight Only Quantization
if not isinstance(quantization_config, PostTrainingQuantConfig):
Expand Down Expand Up @@ -296,9 +305,13 @@ def quantize(
remove_unused_columns=remove_unused_columns,
data_collator=data_collator,
)

op_type_dict = getattr(quantization_config, "op_type_dict", None)
if op_type_dict is None or "Embedding" not in op_type_dict:
logger.warning("ONNX export is no supported for model with quantized embeddings")
if save_onnx_model and (op_type_dict is None or "Embedding" not in op_type_dict):
logger.warning(
"ONNX export is no supported for model with quantized embeddings. "
"Setting `save_onnx_model` to False."
)
save_onnx_model = False

if not isinstance(quantization_config, PostTrainingQuantConfig):
Expand Down
13 changes: 8 additions & 5 deletions optimum/intel/neural_compressor/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ def __init__(
# TODO : To deprecate once support transformers > 4.30.0
self.deepspeed = None

if save_onnx_model:
logger.warning("ONNX model saving is deprecated and will be removed soon.")

# Attach dtype and architecture to the config
if quantization_config is not None:
self.dtype = "int8"
Expand Down Expand Up @@ -678,15 +681,12 @@ def _inner_training_loop(
def save_model(
self,
output_dir: Optional[str] = None,
_internal_call: bool = False,
save_onnx_model: Optional[bool] = None,
save_onnx_model: bool = False,
):
"""
Will save the model, so you can reload it using `from_pretrained()`.
Will only save from the main process.
"""
save_onnx_model = save_onnx_model if save_onnx_model is not None else self.save_onnx_model

if output_dir is None:
output_dir = self.args.output_dir

Expand Down Expand Up @@ -734,7 +734,10 @@ def _save(

# Disable ONNX export for quantized model as deprecated in neural-compressor>=2.2.0
if save_onnx_model and self.dtype == "int8":
logger.warning("ONNX export for quantized model is no longer supported by neural-compressor>=2.2.0. ")
logger.warning(
"ONNX export for quantized model is no longer supported by neural-compressor>=2.2.0. "
"Setting `save_onnx_model` to False."
)
save_onnx_model = False

# Export the compressed model to the ONNX format
Expand Down

0 comments on commit 384dda5

Please sign in to comment.