Skip to content

Commit

Permalink
remove dependency
Browse files Browse the repository at this point in the history
Signed-off-by: changwangss <[email protected]>
  • Loading branch information
changwangss committed Sep 5, 2024
1 parent a31fc6a commit 3b5f228
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 8 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/test_inc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,13 @@ jobs:
pip install py-cpuinfo
pip install torch==2.3.0 torchaudio==2.3.0 torchvision==0.18 --index-url https://download.pytorch.org/whl/cpu
pip install .[neural-compressor,diffusers,tests]
pip install intel-extension-for-transformers
pip install peft
- name: Test with Pytest
run: |
pytest tests/neural_compressor/ --ignore tests/neural_compressor/test_ipex.py --durations=0
- name: Test IPEX
run: |
pip uninstall -y intel-extension-for-transformers
pip install intel-extension-for-pytorch==2.3.0
pytest tests/neural_compressor/test_ipex.py
2 changes: 1 addition & 1 deletion examples/neural_compressor/language-modeling/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,4 @@ respectively `dynamic`, `static`, `weight_only` or `aware_training`.

The flag `--verify_loading` can be passed along to verify that the resulting quantized model can be loaded correctly.

> **_Note:_** `weight_only` quantization_approach requires `neural-compressor` >= 2.3 and `intel-extension-for-transformers` >= 1.3.
> **_Note:_** `weight_only` quantization_approach requires `neural-compressor` > 3.0.
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@ torch >= 1.9
datasets >= 1.8.0
sentencepiece != 0.1.92
protobuf
intel-extension-for-transformers >= 1.3
peft
8 changes: 8 additions & 0 deletions optimum/intel/neural_compressor/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ def _from_pretrained(
_BaseINCAutoModelClass,
)

warnings.warn(
"Weight only quantization model loading provided by intel_extension_for_transformers is deprecated and it is provided by INC now.",
DeprecationWarning,
)
_BaseINCAutoModelClass.ORIG_MODEL = cls.auto_model_class
model = _BaseINCAutoModelClass.load_low_bit(
model_id,
Expand All @@ -157,6 +161,10 @@ def _from_pretrained(
except Exception as e:
raise RuntimeError(f"The quantized model cannot be loaded. Detailed error: {e}")
if isinstance(quantization_config, (RtnConfig, GPTQConfig)):
warnings.warn(
"Weight only quantization provided by intel_extension_for_transformers is deprecated and it is provided by INC now.",
DeprecationWarning,
)
model = weight_only_quantization(
cls.auto_model_class,
model_id,
Expand Down
4 changes: 0 additions & 4 deletions optimum/intel/neural_compressor/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,10 +354,6 @@ def weight_only_quantization(
device_map = kwargs.get("device_map", "xpu" if (hasattr(torch, "xpu") and torch.xpu.is_available()) else "cpu")
use_xpu = True if device_map == torch.device("xpu") or device_map == "xpu" else False

warnings.warn(
"Weight only quantization provided by intel_extension_for_transformers is deprecated and it is provided by INC now.",
DeprecationWarning,
)
if is_neural_compressor_version("<=", "3.0"):
raise AssertionError("Please use neural_compressor version > 3.0.")
if is_ipex_version("<", "2.3.1") and use_xpu:
Expand Down

0 comments on commit 3b5f228

Please sign in to comment.