diff --git a/src/cpp/src/lm_encoding.cpp b/src/cpp/src/lm_encoding.cpp index ab72bac7f5..644aa369c6 100644 --- a/src/cpp/src/lm_encoding.cpp +++ b/src/cpp/src/lm_encoding.cpp @@ -86,7 +86,6 @@ std::pair get_lm_encoded_results( if (position_ids.has_value()) m_llm.set_tensor("position_ids", *position_ids); - m_llm.get_tensor("beam_idx").set_shape({ batch_size }); ov::Tensor beam_idx = ov::Tensor(ov::element::i32, {batch_size}); auto beam_data = beam_idx.data(); if (selected_beam_idx.has_value()) diff --git a/tests/python_tests/test_generate_api.py b/tests/python_tests/test_generate_api.py index d79df9c6f2..2f80857359 100644 --- a/tests/python_tests/test_generate_api.py +++ b/tests/python_tests/test_generate_api.py @@ -840,3 +840,11 @@ def test_perf_metrics(model_descr, generation_config, prompt): assert len(raw_metrics.m_times_to_first_token) > 0 assert len(raw_metrics.m_batch_sizes) > 0 assert len(raw_metrics.m_durations) > 0 + + +@pytest.mark.precommit +@pytest.mark.nightly +def test_batch_switch(): + pipe = read_model(('katuni4ka/tiny-random-phi3', Path('tiny-random-phi3')))[4] + pipe.generate(["a"], max_new_tokens=2) + pipe.generate(["1", "2"], max_new_tokens=2)