diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index 228707b063b..53b2d3b84ac 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -499,7 +499,7 @@ def export_compressed_model( autoround_config = self.autoround_config if hasattr(self, "autoround_config") else {} - if gptq_config: + if gptq_config or (autoround_config and device == "xpu"): for k, v in weight_config.items(): logger.debug(f"Compressing {k} on device {device}") if v["dtype"] == "fp32": @@ -558,7 +558,7 @@ def export_compressed_model( ) new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm) set_module(self.model, k, new_module) - elif autoround_config: + elif autoround_config and (device == "cpu" or device == "auto"): from auto_round.export.export_to_itrex.export import pack_model # pylint: disable=E0401 self.model = pack_model(