Skip to content

Commit

Permalink
improve ut
Browse files Browse the repository at this point in the history
Signed-off-by: changwangss <[email protected]>
  • Loading branch information
changwangss committed Sep 6, 2024
1 parent bc3b95a commit 99f797d
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 14 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test_inc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:
pip install py-cpuinfo
pip install torch==2.3.0 torchaudio==2.3.0 torchvision==0.18 --index-url https://download.pytorch.org/whl/cpu
pip install .[neural-compressor,diffusers,tests]
pip install intel_extension_for_pytorch==2.3.0
pip install peft
- name: Test with Pytest
Expand Down
14 changes: 8 additions & 6 deletions tests/neural_compressor/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,23 +147,25 @@ def test_compare_with_and_without_past_key_values(self):
self.assertTrue(torch.equal(outputs_with_pkv, outputs_without_pkv))

def test_saving_loading_inc_woq_model(self):
model_name = "TheBlokeAI/Mixtral-tiny-GPTQ"
subfolder = "inc"
model = INCModelForCausalLM.from_pretrained(model_name, revision="inc", subfolder=subfolder)
tokenizer = AutoTokenizer.from_pretrained(model_name, revision="inc")
model_name = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ"
subfolder = "inc_woq"
model = INCModelForCausalLM.from_pretrained(model_name, revision="main")
tokenizer = AutoTokenizer.from_pretrained(model_name, revision="main")
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
tokens = tokenizer("This is a sample output", return_tensors="pt")

with torch.no_grad():
outputs = model(**tokens)

with tempfile.TemporaryDirectory() as tmp_dir:
model_save_dir = Path(tmp_dir) / subfolder
model.save_pretrained(model_save_dir)
folder_contents = os.listdir(model_save_dir)
self.assertIn(SAFE_WEIGHTS_NAME, folder_contents)
self.assertIn(QUANTIZATION_CONFIG_NAME, folder_contents)
loaded_model = INCModelForCausalLM.from_pretrained(tmp_dir, subfolder=subfolder)
loaded_model = INCModelForCausalLM.from_pretrained(model_save_dir)

with torch.no_grad():
outputs = model(**tokens)
loaded_outputs = loaded_model(**tokens)

self.assertTrue("logits" in loaded_outputs)
Expand Down
26 changes: 18 additions & 8 deletions tests/neural_compressor/test_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,22 +467,32 @@ def _compute_metrics(pred):

class WeightOnlyQuantizationTest(INCTestMixin):
WEIGHT_ONLY_CONFIG = (
("rtn", "int4_clip"),
("gptq", "int4_clip"),
("rtn", 4),
("gptq", 4),
)

@parameterized.expand(WEIGHT_ONLY_CONFIG)
def test_weight_only_quantization(self, methodology, weight_dtype):
def test_weight_only_quantization(self, methodology, bits):
model_name = "hf-internal-testing/tiny-random-GPTNeoForCausalLM"

from neural_compressor.transformers import GPTQConfig, RtnConfig

bits = 4
if methodology == "gptq":
# max_input_length can be removed after neural-compressor > v2.5.1
quantization_config = GPTQConfig(bits=bits, sym=True, damp_percent=0.01)
tokenizer = AutoTokenizer.from_pretrained(model_name)
quantization_config = GPTQConfig(
bits=bits,
sym=True,
damp_percent=0.01,
desc_act=True,
tokenizer=tokenizer,
n_samples=20,
group_size=8,
batch_size=5,
seq_len=32,
block_size=16,
)
else:
quantization_config = RtnConfig(bits=bits)
quantization_config = RtnConfig(bits=bits, group_size=8)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
Expand All @@ -494,7 +504,7 @@ def test_weight_only_quantization(self, methodology, weight_dtype):

with torch.no_grad():
quantizer_outputs = quantized_model(**tokens)
quantized_model.saved_pretrained(tmp_dir)
quantized_model.save_pretrained(tmp_dir)
loaded_model = INCModelForCausalLM.from_pretrained(tmp_dir)
with torch.no_grad():
loaded_outputs = loaded_model(**tokens)
Expand Down

0 comments on commit 99f797d

Please sign in to comment.