Skip to content

Commit

Permalink
Fix use_parallel_residual and qkv_bias for StableLM GGUF config e…
Browse files Browse the repository at this point in the history
…xtraction (huggingface#34450)

* fix stablelm qkv_bias

* fix stablelm qkv_bias and use_parallel_residual

* remove original_model.config for stablelm gguf test
  • Loading branch information
Isotr0py authored and 2015aroras committed Nov 15, 2024
1 parent 3f8452c commit 4c83cf7
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
11 changes: 11 additions & 0 deletions src/transformers/modeling_gguf_pytorch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,17 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False):
if "qwen2moe" in architecture:
updated_architecture = "qwen2_moe"

# For stablelm architecture, we need to set qkv_bias and use_parallel_residual from tensors
# If `qkv_bias=True`, qkv_proj with bias will be present in the tensors
# If `use_parallel_residual=False`, ffn_norm will be present in the tensors
if "stablelm" in architecture:
attn_bias_name = {"attn_q.bias", "attn_k.bias", "attn_v.bias"}
ffn_norm_name = "ffn_norm"
qkv_bias = any(bias_name in tensor.name for tensor in reader.tensors for bias_name in attn_bias_name)
use_parallel_residual = any(ffn_norm_name in tensor.name for tensor in reader.tensors)
parsed_parameters["config"]["qkv_bias"] = qkv_bias
parsed_parameters["config"]["use_parallel_residual"] = not use_parallel_residual

model_size = ""
# extract the number of params from file name as architectures can differ ;
# eg. for falcon : `...falcon-7b-...`
Expand Down
8 changes: 0 additions & 8 deletions tests/quantization/ggml/test_ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,10 +673,6 @@ def test_stablelm_fp16(self):
self.stablelm2_model_id,
gguf_file=self.fp16_stablelm2_model_id,
torch_dtype=torch.float16,
# for precise comparison it is required to use the original model config
# as quantized one is different in parameters: use_parallel_residual and use_qkv_bias
# and it highly influences on the output results
config=original_model.config,
)

tokenizer = AutoTokenizer.from_pretrained(self.stablelm2_model_id, gguf_file=self.fp16_stablelm2_model_id)
Expand All @@ -703,10 +699,6 @@ def test_stablelm_weights_conversion_fp16(self):
gguf_file=self.fp16_stablelm2_model_id,
device_map="auto",
torch_dtype=torch.float16,
# for precise comparison it is required to use the original model config
# as quantized one is different in parameters: use_parallel_residual and use_qkv_bias
# and it highly influences on the output results
config=original_model.config,
)

converted_state_dict = converted_model.state_dict()
Expand Down

0 comments on commit 4c83cf7

Please sign in to comment.