diff --git a/examples/llm_compression/openvino/tiny_llama/main.py b/examples/llm_compression/openvino/tiny_llama/main.py index dd03a4361c6..f2be54ce1aa 100644 --- a/examples/llm_compression/openvino/tiny_llama/main.py +++ b/examples/llm_compression/openvino/tiny_llama/main.py @@ -67,7 +67,7 @@ def transform_fn(data, model, tokenizer): ) model.save_pretrained(OUTPUT_DIR) - model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR, ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"}) + model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR) input_ids = tokenizer("What is PyTorch?", return_tensors="pt").to(device=model.device) start_t = time.time() diff --git a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py index 7ab0176eb85..6b57b9481f2 100644 --- a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py +++ b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py @@ -245,7 +245,6 @@ def main(): "PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": "", - "DYNAMIC_QUANTIZATION_GROUP_SIZE": "0", } model = OVModelForCausalLM.from_pretrained( model_id, diff --git a/tests/post_training/pipelines/lm_weight_compression.py b/tests/post_training/pipelines/lm_weight_compression.py index 27479fe6a50..06074701b42 100644 --- a/tests/post_training/pipelines/lm_weight_compression.py +++ b/tests/post_training/pipelines/lm_weight_compression.py @@ -275,7 +275,6 @@ def _validate(self): load_in_8bit=False, compile=False, stateful=is_stateful, - ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"}, ) print("Evaluation of the target model") _, all_metrics = evaluator.score(compressed_model_hf)