Skip to content

Commit

Permalink
Address review comments + add test with long input
Browse files Browse the repository at this point in the history
  • Loading branch information
eshiryae committed Dec 11, 2024
1 parent 3bab16c commit 2fdac12
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 32 deletions.
3 changes: 3 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
- 'src/cpp/include/openvino/genai/whisper_pipeline.hpp'
- 'src/cpp/src/whisper/**/*'
- 'src/cpp/src/whisper_generation_config.cpp'
- 'src/cpp/src/whisper_pipeline_base.hpp'
- 'src/cpp/src/whisper_pipeline_static.cpp'
- 'src/cpp/src/whisper_pipeline_static.hpp'
- 'src/cpp/src/whisper_pipeline.cpp'
- 'src/python/py_whisper_pipeline.cpp'
- 'tests/python_tests/test_whisper_generate_api.py'
Expand Down
70 changes: 38 additions & 32 deletions tests/python_tests/test_whisper_pipeline_static.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@

from ov_genai_test_utils import get_whisper_models_list
from test_whisper_generate_api import get_samples_from_dataset
from transformers import WhisperProcessor, pipeline, AutoTokenizer
from transformers import WhisperProcessor, AutoTokenizer
from optimum.intel.openvino import OVModelForSpeechSeq2Seq
import openvino_genai as ov_genai
import openvino_tokenizers
import openvino
import pytest

# This test suite is designed specifically to validate the functionality
# and robustness of the WhisperStaticPipeline on NPUW:CPU.
config = {"NPU_USE_NPUW" : "YES",
"NPUW_DEVICES" : "CPU",
"NPUW_ONLINE_PIPELINE" : "NONE"}
Expand Down Expand Up @@ -47,11 +49,23 @@ def load_and_save_whisper_model(params, **tokenizer_kwargs):
opt_model.save_pretrained(path)
processor.save_pretrained(path)

def get_results_cpu_npu(model_path, audio_sample, **config_kwargs):
cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(audio_sample, **config_kwargs)

npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
actual_out = npu_pipe.generate(audio_sample, **config_kwargs)

return expected, actual_out

def compare_results_with_assert(expected, actual_out):
if expected.texts[0] != actual_out.texts[0]:
print(f'expected: {expected.texts[0]}\n')
print(f'actual_out: {actual_out.texts[0]}')
assert expected.texts[0] == actual_out.texts[0]
assert len(expected.texts) == len(actual_out.texts)

for i in range(0, len(expected.texts)):
if expected.texts[i] != actual_out.texts[i]:
print(f'expected: {expected.texts[i]}\n')
print(f'actual_out: {actual_out.texts[i]}')
assert expected.texts[i] == actual_out.texts[i]


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
Expand All @@ -61,33 +75,24 @@ def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(test_sample)
# expected = None

npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
actual_out = npu_pipe.generate(test_sample)
expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample",
[
# *get_samples_from_dataset(language="fr", length=2), # 1/2 failed
*get_samples_from_dataset(language="fr", length=1),
*get_samples_from_dataset(language="de", length=2),
# *get_samples_from_dataset(language="es", length=2), # 1/2 failed
# *get_samples_from_dataset(language="es", length=2), # mismatch CPU/NPU pipelines
],)
@pytest.mark.precommit
def test_static_whisper_autodetect(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(test_sample)

npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
actual_out = npu_pipe.generate(test_sample)
expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)

Expand All @@ -101,11 +106,7 @@ def test_static_whisper_language_de(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|de|>")

npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|de|>")
expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|de|>")

compare_results_with_assert(expected, actual_out)

Expand All @@ -119,11 +120,7 @@ def test_static_whisper_language_fr(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>")

npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>")
expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|fr|>")

compare_results_with_assert(expected, actual_out)

Expand All @@ -137,10 +134,19 @@ def test_static_whisper_language_ru(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|ru|>")
expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|ru|>")

npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|ru|>")
compare_results_with_assert(expected, actual_out)


@pytest.mark.skip(reason="Mismatches in output")
@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1, long_form=True))
@pytest.mark.precommit
def test_static_whisper_generation_long(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)

0 comments on commit 2fdac12

Please sign in to comment.