diff --git a/tests/python_tests/test_llm_pipeline_static.py b/tests/python_tests/test_llm_pipeline_static.py index cad8b0fea0..e2e38d5596 100644 --- a/tests/python_tests/test_llm_pipeline_static.py +++ b/tests/python_tests/test_llm_pipeline_static.py @@ -6,6 +6,7 @@ import pytest import sys from ov_genai_test_utils import ( + read_model, get_models_list, get_chat_models_list, ) @@ -132,12 +133,10 @@ def test_max_number_of_tokens(): assert len(encoded_results.tokens[0]) == num_tokens -# FIXME: Known problem, output differs from stateful pipeline starting from 3rd prompt! @pytest.mark.skipif(sys.platform in ["darwin", "linux"], reason="Not supposed to work on mac. Segfault on linux CI") -@pytest.mark.skip(reason="JIRA-144780: Output differs from stateful pipeline") @pytest.mark.precommit @pytest.mark.nightly -def test_chat_generation(model_descr): +def test_chat_generation(): questions = [ '1+1=', 'What is the previous answer?', @@ -145,10 +144,11 @@ def test_chat_generation(model_descr): 'What was my first question?' ] - model_path = get_chat_models_lists()[0][1] + model_descr = get_chat_models_list()[0] + model_info = read_model((model_descr[0], model_descr[1] / '_test_chat'), add_special_tokens=False) - chat_history_stateful = generate_chat_history(model_path, "CPU", { }, questions) - chat_history_static = generate_chat_history(model_path, "NPU", common_config, questions) + chat_history_stateful = generate_chat_history(model_info[1], "CPU", { }, questions) + chat_history_static = generate_chat_history(model_info[1], "NPU", common_config, questions) print('npu chat: \n{chat_history_static}\n') print('cpu chat: \n{chat_history_stateful}')