Download model if it's not found

openvinotoolkit · Dec 4, 2024 · e697a90 · e697a90
1 parent b2cf1d9
commit e697a90
Showing 1 changed file with 44 additions and 1 deletion.
diff --git a/tests/python_tests/test_whisper_pipeline_static.py b/tests/python_tests/test_whisper_pipeline_static.py
@@ -3,13 +3,50 @@
 
 from ov_genai_test_utils import get_whisper_models_list
 from test_whisper_generate_api import get_samples_from_dataset
+from transformers import WhisperProcessor, pipeline, AutoTokenizer
+from optimum.intel.openvino import OVModelForSpeechSeq2Seq
 import openvino_genai as ov_genai
+import openvino_tokenizers
+import openvino
 import pytest
 
 config = {"NPU_USE_NPUW" : "YES",
           "NPUW_DEVICES" : "CPU",
           "NPUW_ONLINE_PIPELINE" : "NONE"}
 
+def load_and_save_whisper_model(params, **tokenizer_kwargs):
+    model_id, path = params
+
+    processor = WhisperProcessor.from_pretrained(model_id, trust_remote_code=True)
+
+    if not (path / "openvino_encoder_model.xml").exists():
+        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+        ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(
+            tokenizer,
+            with_detokenizer=True,
+            clean_up_tokenization_spaces=False,
+            **tokenizer_kwargs,
+        )
+
+        openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml")
+        openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml")
+
+        # to store tokenizer config jsons with special tokens
+        tokenizer.save_pretrained(path)
+
+        opt_model = OVModelForSpeechSeq2Seq.from_pretrained(
+            model_id,
+            export=True,
+            trust_remote_code=True,
+            compile=False,
+            device="CPU",
+            load_in_8bit=False,
+        )
+        opt_model.generation_config.save_pretrained(path)
+        opt_model.config.save_pretrained(path)
+        opt_model.save_pretrained(path)
+        processor.save_pretrained(path)
+
 def compare_results_with_assert(expected, actual_out):
     if expected.texts[0] != actual_out.texts[0]:
         print(f'expected: {expected.texts[0]}\n')
@@ -18,13 +55,15 @@ def compare_results_with_assert(expected, actual_out):
 
 
 @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
-@pytest.mark.parametrize("test_sample", get_samples_from_dataset(length=1))
+@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1))
 @pytest.mark.precommit
 def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample):
     model_id, model_path = model_descr
+    load_and_save_whisper_model(model_descr)
 
     cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
     expected = cpu_pipe.generate(test_sample)
+    # expected = None
 
     npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
     actual_out = npu_pipe.generate(test_sample)
@@ -42,6 +81,7 @@ def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample):
 @pytest.mark.precommit
 def test_static_whisper_autodetect(model_descr, test_sample):
     model_id, model_path = model_descr
+    load_and_save_whisper_model(model_descr)
 
     cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
     expected = cpu_pipe.generate(test_sample)
@@ -59,6 +99,7 @@ def test_static_whisper_autodetect(model_descr, test_sample):
 @pytest.mark.precommit
 def test_static_whisper_language_de(model_descr, test_sample):
     model_id, model_path = model_descr
+    load_and_save_whisper_model(model_descr)
 
     cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
     expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|de|>")
@@ -76,6 +117,7 @@ def test_static_whisper_language_de(model_descr, test_sample):
 @pytest.mark.precommit
 def test_static_whisper_language_fr(model_descr, test_sample):
     model_id, model_path = model_descr
+    load_and_save_whisper_model(model_descr)
 
     cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
     expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>")
@@ -93,6 +135,7 @@ def test_static_whisper_language_fr(model_descr, test_sample):
 @pytest.mark.precommit
 def test_static_whisper_language_ru(model_descr, test_sample):
     model_id, model_path = model_descr
+    load_and_save_whisper_model(model_descr)
 
     cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
     expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|ru|>")