From e9404b5b8b40ce93efdd59cf7524b820c61cc3fd Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Mon, 30 Sep 2024 14:13:19 +0800 Subject: [PATCH 1/6] rm loaded_state_dict_keys params Signed-off-by: Kaihui-intel --- neural_compressor/torch/algorithms/weight_only/save_load.py | 1 - 1 file changed, 1 deletion(-) diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py index 8d1259cad00..df8a04b7b3c 100644 --- a/neural_compressor/torch/algorithms/weight_only/save_load.py +++ b/neural_compressor/torch/algorithms/weight_only/save_load.py @@ -837,7 +837,6 @@ def _load_remaining_pretrained_weight(self, model): _load_state_dict_into_meta_model( model=model, state_dict=state_dict, - loaded_state_dict_keys=self.loaded_state_dict_keys, start_prefix="", expected_keys=list(state_dict.keys()), device_map={"": self.device}, From e86c16aa5b0ec54fd97b7c66fa939af88e3247cb Mon Sep 17 00:00:00 2001 From: changwangss Date: Mon, 30 Sep 2024 00:01:30 -0700 Subject: [PATCH 2/6] adapt transformers Signed-off-by: changwangss --- neural_compressor/torch/algorithms/weight_only/awq.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/neural_compressor/torch/algorithms/weight_only/awq.py b/neural_compressor/torch/algorithms/weight_only/awq.py index 00d7fb5172c..677f3cb9899 100644 --- a/neural_compressor/torch/algorithms/weight_only/awq.py +++ b/neural_compressor/torch/algorithms/weight_only/awq.py @@ -516,6 +516,9 @@ def block_inference(self, model): """ total_out = [] for args, kwargs in zip(self.total_block_args, self.total_block_kwargs): + # to avoid layer_past: Dynamic_cache when transformers higher than 4.45.1 + if "layer_past" in kwargs.keys() and kwargs["layer_past"] is not None: + kwargs["layer_past"] = None out = model(*args, **kwargs) if isinstance(out, tuple): # pragma: no cover out = out[0] From 48013d4616d085b3c3ab9cc58aec69c3cec418ee Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Mon, 30 Sep 2024 14:53:29 +0800 Subject: [PATCH 3/6] adapt habana 4.43.3 Signed-off-by: Kaihui-intel --- .../torch/algorithms/weight_only/save_load.py | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py index df8a04b7b3c..de4d95eef39 100644 --- a/neural_compressor/torch/algorithms/weight_only/save_load.py +++ b/neural_compressor/torch/algorithms/weight_only/save_load.py @@ -834,18 +834,36 @@ def _load_remaining_pretrained_weight(self, model): resolved_archive_file = [resolved_archive_file] for shard_file in resolved_archive_file: state_dict = load_state_dict(shard_file) - _load_state_dict_into_meta_model( - model=model, - state_dict=state_dict, - start_prefix="", - expected_keys=list(state_dict.keys()), - device_map={"": self.device}, - offload_folder=offload_folder, - state_dict_folder=tempfile.mkdtemp() if offload_state_dict else None, - state_dict_index={} if offload_state_dict else None, - dtype=torch_dtype, - keep_in_fp32_modules=[], - ) + import transformers + from packaging.version import Version + if Version(transformers.__version__) >= Version("4.5"): # pragma: no cover + _load_state_dict_into_meta_model( + model=model, + state_dict=state_dict, + start_prefix="", + expected_keys=list(state_dict.keys()), + device_map={"": self.device}, + offload_folder=offload_folder, + state_dict_folder=tempfile.mkdtemp() if offload_state_dict else None, + state_dict_index={} if offload_state_dict else None, + dtype=torch_dtype, + keep_in_fp32_modules=[], + ) + else: + _load_state_dict_into_meta_model( + model=model, + state_dict=state_dict, + loaded_state_dict_keys=self.loaded_state_dict_keys, + start_prefix="", + expected_keys=list(state_dict.keys()), + device_map={"": self.device}, + offload_folder=offload_folder, + state_dict_folder=tempfile.mkdtemp() if offload_state_dict else None, + state_dict_index={} if offload_state_dict else None, + dtype=torch_dtype, + keep_in_fp32_modules=[], + ) + # make sure token embedding weights are still tied if needed model.tie_weights() From 89bc02c9a37916232674a54565b398a2296af736 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Mon, 30 Sep 2024 15:20:40 +0800 Subject: [PATCH 4/6] update checked version Signed-off-by: Kaihui-intel --- neural_compressor/torch/algorithms/weight_only/save_load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py index de4d95eef39..436e33bddea 100644 --- a/neural_compressor/torch/algorithms/weight_only/save_load.py +++ b/neural_compressor/torch/algorithms/weight_only/save_load.py @@ -836,7 +836,7 @@ def _load_remaining_pretrained_weight(self, model): state_dict = load_state_dict(shard_file) import transformers from packaging.version import Version - if Version(transformers.__version__) >= Version("4.5"): # pragma: no cover + if Version(transformers.__version__) >= Version("4.45.0"): # pragma: no cover _load_state_dict_into_meta_model( model=model, state_dict=state_dict, From a44aa46ef4aa4097c331184beecc02f2cd69046a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 Sep 2024 07:42:09 +0000 Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/torch/algorithms/weight_only/save_load.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py index de4d95eef39..cb0c4df4028 100644 --- a/neural_compressor/torch/algorithms/weight_only/save_load.py +++ b/neural_compressor/torch/algorithms/weight_only/save_load.py @@ -836,7 +836,8 @@ def _load_remaining_pretrained_weight(self, model): state_dict = load_state_dict(shard_file) import transformers from packaging.version import Version - if Version(transformers.__version__) >= Version("4.5"): # pragma: no cover + + if Version(transformers.__version__) >= Version("4.5"): # pragma: no cover _load_state_dict_into_meta_model( model=model, state_dict=state_dict, @@ -864,7 +865,6 @@ def _load_remaining_pretrained_weight(self, model): keep_in_fp32_modules=[], ) - # make sure token embedding weights are still tied if needed model.tie_weights() From 0a5e7ede6b58c0e1d22f31d62215e713b76b873a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 Sep 2024 08:11:37 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/torch/algorithms/weight_only/save_load.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py index 2daf096d38c..7d22c7efbc9 100644 --- a/neural_compressor/torch/algorithms/weight_only/save_load.py +++ b/neural_compressor/torch/algorithms/weight_only/save_load.py @@ -836,7 +836,8 @@ def _load_remaining_pretrained_weight(self, model): state_dict = load_state_dict(shard_file) import transformers from packaging.version import Version - if Version(transformers.__version__) >= Version("4.45.0"): # pragma: no cover + + if Version(transformers.__version__) >= Version("4.45.0"): # pragma: no cover _load_state_dict_into_meta_model( model=model, state_dict=state_dict,