Skip to content

Commit

Permalink
add limit for use_layer_wise
Browse files Browse the repository at this point in the history
Signed-off-by: changwangss <[email protected]>
  • Loading branch information
changwangss committed Dec 3, 2024
1 parent 0b36916 commit 8c58510
Showing 1 changed file with 2 additions and 16 deletions.
18 changes: 2 additions & 16 deletions optimum/intel/neural_compressor/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,25 +375,11 @@ def _weight_only_quantization(

low_cpu_mem_usage = True

if getattr(quantization_config, "use_layer_wise", False):
if getattr(quantization_config, "use_layer_wise", False) and token is None and subfolder == "":
from neural_compressor.torch import load_empty_model

model = load_empty_model(model_id, cls=model_class, trust_remote_code=trust_remote_code)
else:
if use_xpu:
try:
# TODO: if low_cpu_mem_uasge is True, gptj will have accuracy issue on CPU device.
model = model_class.from_pretrained(
model_id, low_cpu_mem_usage=low_cpu_mem_usage, device_map="cpu", **loading_kwargs
)
except NotImplementedError:
logger.info(
"Failed to load models with `low_cpu_mem_usage=True`, will fall to traditional load method resulting in higher memory consumption."
)
low_cpu_mem_usage = False
model = model_class.from_pretrained(model_id, low_cpu_mem_usage=low_cpu_mem_usage, **loading_kwargs)
else:
model = model_class.from_pretrained(model_id, low_cpu_mem_usage=low_cpu_mem_usage, **loading_kwargs)
model = model_class.from_pretrained(model_id, low_cpu_mem_usage=low_cpu_mem_usage, **loading_kwargs)

if use_xpu:
quantization_config.update(**{"device": "xpu"})
Expand Down

0 comments on commit 8c58510

Please sign in to comment.