remove dependency

Signed-off-by: changwangss <[email protected]>
changwangss · Sep 5, 2024 · 3b5f228 · 3b5f228
1 parent a31fc6a
commit 3b5f228
Show file tree

Hide file tree

Showing 5 changed files with 9 additions and 8 deletions.
diff --git a/.github/workflows/test_inc.yml b/.github/workflows/test_inc.yml
@@ -37,15 +37,13 @@ jobs:
         pip install py-cpuinfo
         pip install torch==2.3.0 torchaudio==2.3.0 torchvision==0.18 --index-url https://download.pytorch.org/whl/cpu
         pip install .[neural-compressor,diffusers,tests]
-        pip install intel-extension-for-transformers
         pip install peft
 
     - name: Test with Pytest
       run: |
         pytest tests/neural_compressor/ --ignore tests/neural_compressor/test_ipex.py --durations=0
     - name: Test IPEX
       run: |
-        pip uninstall -y intel-extension-for-transformers
         pip install intel-extension-for-pytorch==2.3.0
         pytest tests/neural_compressor/test_ipex.py
 
diff --git a/examples/neural_compressor/language-modeling/README.md b/examples/neural_compressor/language-modeling/README.md
@@ -97,4 +97,4 @@ respectively `dynamic`, `static`, `weight_only` or `aware_training`.
 
 The flag `--verify_loading` can be passed along to verify that the resulting quantized model can be loaded correctly.
 
-> **_Note:_** `weight_only` quantization_approach requires `neural-compressor` >= 2.3 and `intel-extension-for-transformers` >= 1.3.
+> **_Note:_** `weight_only` quantization_approach requires `neural-compressor` > 3.0.
diff --git a/examples/neural_compressor/language-modeling/requirements.txt b/examples/neural_compressor/language-modeling/requirements.txt
@@ -3,5 +3,4 @@ torch >= 1.9
 datasets >= 1.8.0
 sentencepiece != 0.1.92
 protobuf
-intel-extension-for-transformers >= 1.3
 peft
diff --git a/optimum/intel/neural_compressor/modeling_base.py b/optimum/intel/neural_compressor/modeling_base.py
@@ -139,6 +139,10 @@ def _from_pretrained(
                         _BaseINCAutoModelClass,
                     )
 
+                    warnings.warn(
+                        "Weight only quantization model loading provided by intel_extension_for_transformers is deprecated and it is provided by INC now.",
+                        DeprecationWarning,
+                    )
                     _BaseINCAutoModelClass.ORIG_MODEL = cls.auto_model_class
                     model = _BaseINCAutoModelClass.load_low_bit(
                         model_id,
@@ -157,6 +161,10 @@ def _from_pretrained(
                 except Exception as e:
                     raise RuntimeError(f"The quantized model cannot be loaded. Detailed error: {e}")
         if isinstance(quantization_config, (RtnConfig, GPTQConfig)):
+            warnings.warn(
+                "Weight only quantization provided by intel_extension_for_transformers is deprecated and it is provided by INC now.",
+                DeprecationWarning,
+            )
             model = weight_only_quantization(
                 cls.auto_model_class,
                 model_id,

diff --git a/optimum/intel/neural_compressor/quantization.py b/optimum/intel/neural_compressor/quantization.py
@@ -354,10 +354,6 @@ def weight_only_quantization(
     device_map = kwargs.get("device_map", "xpu" if (hasattr(torch, "xpu") and torch.xpu.is_available()) else "cpu")
     use_xpu = True if device_map == torch.device("xpu") or device_map == "xpu" else False
 
-    warnings.warn(
-        "Weight only quantization provided by intel_extension_for_transformers is deprecated and it is provided by INC now.",
-        DeprecationWarning,
-    )
     if is_neural_compressor_version("<=", "3.0"):
         raise AssertionError("Please use neural_compressor version > 3.0.")
     if is_ipex_version("<", "2.3.1") and use_xpu:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -97,4 +97,4 @@ respectively `dynamic`, `static`, `weight_only` or `aware_training`.

		The flag `--verify_loading` can be passed along to verify that the resulting quantized model can be loaded correctly.

		> _Note:_ `weight_only` quantization_approach requires `neural-compressor` >= 2.3 and `intel-extension-for-transformers` >= 1.3.
		> _Note:_ `weight_only` quantization_approach requires `neural-compressor` > 3.0.