improve ut

Signed-off-by: changwangss <[email protected]>
changwangss · Sep 6, 2024 · 99f797d · 99f797d
1 parent bc3b95a
commit 99f797d
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 14 deletions.
diff --git a/.github/workflows/test_inc.yml b/.github/workflows/test_inc.yml
@@ -37,6 +37,7 @@ jobs:
         pip install py-cpuinfo
         pip install torch==2.3.0 torchaudio==2.3.0 torchvision==0.18 --index-url https://download.pytorch.org/whl/cpu
         pip install .[neural-compressor,diffusers,tests]
+        pip install intel_extension_for_pytorch==2.3.0
         pip install peft
 
     - name: Test with Pytest

diff --git a/tests/neural_compressor/test_modeling.py b/tests/neural_compressor/test_modeling.py
@@ -147,23 +147,25 @@ def test_compare_with_and_without_past_key_values(self):
         self.assertTrue(torch.equal(outputs_with_pkv, outputs_without_pkv))
 
     def test_saving_loading_inc_woq_model(self):
-        model_name = "TheBlokeAI/Mixtral-tiny-GPTQ"
-        subfolder = "inc"
-        model = INCModelForCausalLM.from_pretrained(model_name, revision="inc", subfolder=subfolder)
-        tokenizer = AutoTokenizer.from_pretrained(model_name, revision="inc")
+        model_name = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ"
+        subfolder = "inc_woq"
+        model = INCModelForCausalLM.from_pretrained(model_name, revision="main")
+        tokenizer = AutoTokenizer.from_pretrained(model_name, revision="main")
         tokenizer.add_special_tokens({"pad_token": "[PAD]"})
         tokens = tokenizer("This is a sample output", return_tensors="pt")
 
+        with torch.no_grad():
+            outputs = model(**tokens)
+
         with tempfile.TemporaryDirectory() as tmp_dir:
             model_save_dir = Path(tmp_dir) / subfolder
             model.save_pretrained(model_save_dir)
             folder_contents = os.listdir(model_save_dir)
             self.assertIn(SAFE_WEIGHTS_NAME, folder_contents)
             self.assertIn(QUANTIZATION_CONFIG_NAME, folder_contents)
-            loaded_model = INCModelForCausalLM.from_pretrained(tmp_dir, subfolder=subfolder)
+            loaded_model = INCModelForCausalLM.from_pretrained(model_save_dir)
 
         with torch.no_grad():
-            outputs = model(**tokens)
             loaded_outputs = loaded_model(**tokens)
 
         self.assertTrue("logits" in loaded_outputs)

diff --git a/tests/neural_compressor/test_optimization.py b/tests/neural_compressor/test_optimization.py
@@ -467,22 +467,32 @@ def _compute_metrics(pred):
 
 class WeightOnlyQuantizationTest(INCTestMixin):
     WEIGHT_ONLY_CONFIG = (
-        ("rtn", "int4_clip"),
-        ("gptq", "int4_clip"),
+        ("rtn", 4),
+        ("gptq", 4),
     )
 
     @parameterized.expand(WEIGHT_ONLY_CONFIG)
-    def test_weight_only_quantization(self, methodology, weight_dtype):
+    def test_weight_only_quantization(self, methodology, bits):
         model_name = "hf-internal-testing/tiny-random-GPTNeoForCausalLM"
 
         from neural_compressor.transformers import GPTQConfig, RtnConfig
 
-        bits = 4
         if methodology == "gptq":
-            # max_input_length can be removed after neural-compressor > v2.5.1
-            quantization_config = GPTQConfig(bits=bits, sym=True, damp_percent=0.01)
+            tokenizer = AutoTokenizer.from_pretrained(model_name)
+            quantization_config = GPTQConfig(
+                bits=bits,
+                sym=True,
+                damp_percent=0.01,
+                desc_act=True,
+                tokenizer=tokenizer,
+                n_samples=20,
+                group_size=8,
+                batch_size=5,
+                seq_len=32,
+                block_size=16,
+            )
         else:
-            quantization_config = RtnConfig(bits=bits)
+            quantization_config = RtnConfig(bits=bits, group_size=8)
 
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         tokenizer.add_special_tokens({"pad_token": "[PAD]"})
@@ -494,7 +504,7 @@ def test_weight_only_quantization(self, methodology, weight_dtype):
 
         with torch.no_grad():
             quantizer_outputs = quantized_model(**tokens)
-        quantized_model.saved_pretrained(tmp_dir)
+        quantized_model.save_pretrained(tmp_dir)
         loaded_model = INCModelForCausalLM.from_pretrained(tmp_dir)
         with torch.no_grad():
             loaded_outputs = loaded_model(**tokens)