From e86c16aa5b0ec54fd97b7c66fa939af88e3247cb Mon Sep 17 00:00:00 2001 From: changwangss Date: Mon, 30 Sep 2024 00:01:30 -0700 Subject: [PATCH] adapt transformers Signed-off-by: changwangss --- neural_compressor/torch/algorithms/weight_only/awq.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/neural_compressor/torch/algorithms/weight_only/awq.py b/neural_compressor/torch/algorithms/weight_only/awq.py index 00d7fb5172c..677f3cb9899 100644 --- a/neural_compressor/torch/algorithms/weight_only/awq.py +++ b/neural_compressor/torch/algorithms/weight_only/awq.py @@ -516,6 +516,9 @@ def block_inference(self, model): """ total_out = [] for args, kwargs in zip(self.total_block_args, self.total_block_kwargs): + # to avoid layer_past: Dynamic_cache when transformers higher than 4.45.1 + if "layer_past" in kwargs.keys() and kwargs["layer_past"] is not None: + kwargs["layer_past"] = None out = model(*args, **kwargs) if isinstance(out, tuple): # pragma: no cover out = out[0]