Skip to content

Commit

Permalink
Added stateful model to weight compression tests (openvinotoolkit#2463)
Browse files Browse the repository at this point in the history
### Changes

Added new test case to conformance test suite with compression
[stateful](https://docs.openvino.ai/2022.3/openvino_docs_OV_UG_network_state_intro.html)
models

### Reason for changes

catch regressions in the more efficient representation of LLM

### Related tickets

132159

### Tests

- [x] build 7 of weight compression conformance tests 


![image](https://github.com/openvinotoolkit/nncf/assets/4014476/24f24d54-ef45-4303-b5de-d2ea10a9f5a8)
  • Loading branch information
ljaljushkin authored Feb 14, 2024
1 parent 4c360c9 commit 6539272
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 4 deletions.
2 changes: 2 additions & 0 deletions tests/post_training/data/wc_reference_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ tinyllama_data_aware_backend_OV:
metric_value: 0.83084
tinyllama_data_aware_awq_backend_OV:
metric_value: 0.81229
tinyllama_data_aware_awq_stateful_backend_OV:
metric_value: 0.81229
8 changes: 8 additions & 0 deletions tests/post_training/model_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,14 @@
"compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM, "awq": True},
"backends": [BackendType.OV],
},
{
"reported_name": "tinyllama_data_aware_awq_stateful",
"model_id": "tinyllama/tinyllama-1.1b-step-50k-105b",
"pipeline_cls": LMWeightCompression,
"compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM, "awq": True},
"params": {"is_stateful": True},
"backends": [BackendType.OV],
},
]


Expand Down
21 changes: 17 additions & 4 deletions tests/post_training/pipelines/lm_weight_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,19 @@ class LMWeightCompression(BaseTestPipeline):
OV_MODEL_NAME = "openvino_model.xml"

def prepare_model(self) -> None:
is_stateful = self.params.get("is_stateful", False)
if is_stateful:
self.fp32_model_dir = self.fp32_model_dir.parent / (self.fp32_model_dir.name + "_sf")
if not (self.fp32_model_dir / self.OV_MODEL_NAME).exists():
# export by model_id
self.model_hf = OVModelForCausalLM.from_pretrained(
self.model_id, export=True, load_in_8bit=False, compile=False, stateful=False
self.model_id, export=True, load_in_8bit=False, compile=False, stateful=is_stateful
)
self._dump_model_fp32()
else:
# no export, load from IR. Applicable for sequential run of test cases in local environment.
self.model_hf = OVModelForCausalLM.from_pretrained(
self.fp32_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=False
self.fp32_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=is_stateful
)
self.model = self.model_hf.model

Expand Down Expand Up @@ -112,6 +115,15 @@ def transform_fn(data):
inputs[input_name] = ov.Tensor(model_inputs.get_element_type(), shape.get_shape())

inputs["position_ids"] = position_ids

# initialize the rest of inputs (e.g. beam_idx for stateful models)
for val in self.model.inputs:
name = val.any_name
if name in inputs:
continue
shape = list(val.partial_shape.get_min_shape())
shape[0] = batch_size
inputs[name] = np.zeros(shape)
return inputs

return transform_fn
Expand Down Expand Up @@ -173,6 +185,7 @@ def _compress(self):
)

def _validate(self):
is_stateful = self.params.get("is_stateful", False)
core = ov.Core()

if os.environ.get("CPU_THREADS_NUM"):
Expand All @@ -185,7 +198,7 @@ def _validate(self):
if os.getenv("NNCF_TEST_REGEN_DOT") is not None:
print("Collection ground-truth reference data")
model_gold = OVModelForCausalLM.from_pretrained(
self.fp32_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=False
self.fp32_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=is_stateful
)
evaluator = Evaluator(base_model=model_gold, tokenizer=self.preprocessor, metrics=("similarity",))
evaluator.dump_gt(str(gt_data_path))
Expand All @@ -199,7 +212,7 @@ def _validate(self):
compressed_model_hf = self.model_hf
if self.backend != BackendType.FP32:
compressed_model_hf = OVModelForCausalLM.from_pretrained(
self.output_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=False
self.output_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=is_stateful
)
print("Evaluation of the target model")
_, all_metrics = evaluator.score(compressed_model_hf)
Expand Down

0 comments on commit 6539272

Please sign in to comment.