Added stateful model to weight compression tests (openvinotoolkit#2463)

### Changes Added new test case to conformance test suite with compression [stateful](https://docs.openvino.ai/2022.3/openvino_docs_OV_UG_network_state_intro.html) models ### Reason for changes catch regressions in the more efficient representation of LLM ### Related tickets 132159 ### Tests - [x] build 7 of weight compression conformance tests ![image](https://github.com/openvinotoolkit/nncf/assets/4014476/24f24d54-ef45-4303-b5de-d2ea10a9f5a8)
ksilligan · Feb 14, 2024 · 6539272 · 6539272
1 parent 4c360c9
commit 6539272
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 4 deletions.
diff --git a/tests/post_training/data/wc_reference_data.yaml b/tests/post_training/data/wc_reference_data.yaml
@@ -4,3 +4,5 @@ tinyllama_data_aware_backend_OV:
   metric_value: 0.83084
 tinyllama_data_aware_awq_backend_OV:
   metric_value: 0.81229
+tinyllama_data_aware_awq_stateful_backend_OV:
+  metric_value: 0.81229
diff --git a/tests/post_training/model_scope.py b/tests/post_training/model_scope.py
@@ -301,6 +301,14 @@
         "compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM, "awq": True},
         "backends": [BackendType.OV],
     },
+    {
+        "reported_name": "tinyllama_data_aware_awq_stateful",
+        "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b",
+        "pipeline_cls": LMWeightCompression,
+        "compression_params": {"group_size": 64, "ratio": 0.8, "mode": CompressWeightsMode.INT4_SYM, "awq": True},
+        "params": {"is_stateful": True},
+        "backends": [BackendType.OV],
+    },
 ]
 
 

diff --git a/tests/post_training/pipelines/lm_weight_compression.py b/tests/post_training/pipelines/lm_weight_compression.py
@@ -71,16 +71,19 @@ class LMWeightCompression(BaseTestPipeline):
     OV_MODEL_NAME = "openvino_model.xml"
 
     def prepare_model(self) -> None:
+        is_stateful = self.params.get("is_stateful", False)
+        if is_stateful:
+            self.fp32_model_dir = self.fp32_model_dir.parent / (self.fp32_model_dir.name + "_sf")
         if not (self.fp32_model_dir / self.OV_MODEL_NAME).exists():
             # export by model_id
             self.model_hf = OVModelForCausalLM.from_pretrained(
-                self.model_id, export=True, load_in_8bit=False, compile=False, stateful=False
+                self.model_id, export=True, load_in_8bit=False, compile=False, stateful=is_stateful
             )
             self._dump_model_fp32()
         else:
             # no export, load from IR. Applicable for sequential run of test cases in local environment.
             self.model_hf = OVModelForCausalLM.from_pretrained(
-                self.fp32_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=False
+                self.fp32_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=is_stateful
             )
         self.model = self.model_hf.model
 
@@ -112,6 +115,15 @@ def transform_fn(data):
                 inputs[input_name] = ov.Tensor(model_inputs.get_element_type(), shape.get_shape())
 
             inputs["position_ids"] = position_ids
+
+            # initialize the rest of inputs (e.g. beam_idx for stateful models)
+            for val in self.model.inputs:
+                name = val.any_name
+                if name in inputs:
+                    continue
+                shape = list(val.partial_shape.get_min_shape())
+                shape[0] = batch_size
+                inputs[name] = np.zeros(shape)
             return inputs
 
         return transform_fn
@@ -173,6 +185,7 @@ def _compress(self):
         )
 
     def _validate(self):
+        is_stateful = self.params.get("is_stateful", False)
         core = ov.Core()
 
         if os.environ.get("CPU_THREADS_NUM"):
@@ -185,7 +198,7 @@ def _validate(self):
         if os.getenv("NNCF_TEST_REGEN_DOT") is not None:
             print("Collection ground-truth reference data")
             model_gold = OVModelForCausalLM.from_pretrained(
-                self.fp32_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=False
+                self.fp32_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=is_stateful
             )
             evaluator = Evaluator(base_model=model_gold, tokenizer=self.preprocessor, metrics=("similarity",))
             evaluator.dump_gt(str(gt_data_path))
@@ -199,7 +212,7 @@ def _validate(self):
         compressed_model_hf = self.model_hf
         if self.backend != BackendType.FP32:
             compressed_model_hf = OVModelForCausalLM.from_pretrained(
-                self.output_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=False
+                self.output_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=is_stateful
             )
         print("Evaluation of the target model")
         _, all_metrics = evaluator.score(compressed_model_hf)