From e86c16aa5b0ec54fd97b7c66fa939af88e3247cb Mon Sep 17 00:00:00 2001
From: changwangss <chang1.wang@intel.com>
Date: Mon, 30 Sep 2024 00:01:30 -0700
Subject: [PATCH] adapt transformers

Signed-off-by: changwangss <chang1.wang@intel.com>
---
 neural_compressor/torch/algorithms/weight_only/awq.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/neural_compressor/torch/algorithms/weight_only/awq.py b/neural_compressor/torch/algorithms/weight_only/awq.py
index 00d7fb5172c..677f3cb9899 100644
--- a/neural_compressor/torch/algorithms/weight_only/awq.py
+++ b/neural_compressor/torch/algorithms/weight_only/awq.py
@@ -516,6 +516,9 @@ def block_inference(self, model):
         """
         total_out = []
         for args, kwargs in zip(self.total_block_args, self.total_block_kwargs):
+            # to avoid layer_past: Dynamic_cache when transformers higher than 4.45.1
+            if "layer_past" in kwargs.keys() and kwargs["layer_past"] is not None:
+                kwargs["layer_past"] = None
             out = model(*args, **kwargs)
             if isinstance(out, tuple):  # pragma: no cover
                 out = out[0]