Skip to content

Commit

Permalink
New metrics for weight compression with dynamic quantization
Browse files Browse the repository at this point in the history
  • Loading branch information
ljaljushkin committed Aug 6, 2024
1 parent b108455 commit eeb21a0
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 11 deletions.
2 changes: 1 addition & 1 deletion examples/llm_compression/openvino/tiny_llama/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def transform_fn(data, model, tokenizer):
)
model.save_pretrained(OUTPUT_DIR)

model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR, ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"})
model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR)
input_ids = tokenizer("What is PyTorch?", return_tensors="pt").to(device=model.device)

start_t = time.time()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ def main():
"PERFORMANCE_HINT": "LATENCY",
"NUM_STREAMS": "1",
"CACHE_DIR": "",
"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0",
}
model = OVModelForCausalLM.from_pretrained(
model_id,
Expand Down
16 changes: 8 additions & 8 deletions tests/post_training/data/wc_reference_data.yaml
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
tinyllama_data_free_backend_OV:
metric_value: 0.73873
metric_value: 0.72494
num_int4: 114
num_int8: 84
tinyllama_data_aware_backend_OV:
metric_value: 0.85767
metric_value: 0.85635
num_int4: 94
num_int8: 124
tinyllama_data_aware_awq_stateful_backend_OV:
metric_value: 0.85616
metric_value: 0.84951
num_int4: 94
num_int8: 124
tinyllama_data_aware_awq_scale_estimation_backend_OV:
metric_value: 0.85502
metric_value: 0.85299
num_int4: 94
num_int8: 124
tinyllama_data_aware_awq_scale_estimation_stateful_backend_OV:
metric_value: 0.85502
metric_value: 0.84125
num_int4: 94
num_int8: 124
tinyllama_int8_data_free_backend_TORCH:
metric_value: 0.95624
metric_value: 0.94924
num_int4: 0
num_int8: 312
tinyllama_data_aware_gptq_backend_OV:
metric_value: 0.87134
metric_value: 0.86621
num_int4: 94
num_int8: 124
tinyllama_scale_estimation_per_channel_backend_OV:
metric_value: 0.81389
metric_value: 0.82356
num_int4: 188
num_int8: 124
1 change: 0 additions & 1 deletion tests/post_training/pipelines/lm_weight_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,6 @@ def _validate(self):
load_in_8bit=False,
compile=False,
stateful=is_stateful,
ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"},
)
print("Evaluation of the target model")
_, all_metrics = evaluator.score(compressed_model_hf)
Expand Down

0 comments on commit eeb21a0

Please sign in to comment.