Update compression config for openlm-research/open_llama_3b_v2 (#860)

* Remove compression with all_layers=True for openlm-research/open_llama_3b_v2 * Fix sym parameter * Add AWQ
huggingface · Aug 16, 2024 · 1b7bd9f · 1b7bd9f
1 parent 46f8858
commit 1b7bd9f
Showing 1 changed file with 8 additions and 1 deletion.
diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
@@ -84,7 +84,14 @@ class OVQuantizationMethod(str, Enum):
     "THUDM/chatglm2-6b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.72},
     "Qwen/Qwen-7B-Chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6},
     "openlm-research/open_llama_3b": {"bits": 4, "sym": False, "group_size": 64, "all_layers": True},
-    "openlm-research/open_llama_3b_v2": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True},
+    "openlm-research/open_llama_3b_v2": {
+        "bits": 4,
+        "sym": False,
+        "group_size": 64,
+        "ratio": 1.0,
+        "dataset": "wikitext2",
+        "quant_method": OVQuantizationMethod.AWQ,
+    },
     "tiiuae/falcon-7b-instruct": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True},
     "psmathur/orca_mini_3b": {
         "bits": 4,