From dc9ef33e18dc76b64de3450a3df1625f02bdfd02 Mon Sep 17 00:00:00 2001 From: Zlobin Vladimir Date: Mon, 5 Aug 2024 12:26:31 +0400 Subject: [PATCH] Merge releases/2024/3 into master (#731) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Alina Kladieva Co-authored-by: Anastasiia Pnevskaia Co-authored-by: Nikita Malinin Co-authored-by: Yaroslav Tarkan Co-authored-by: Anatoliy Talamanov Co-authored-by: Pavel Esir Co-authored-by: Miłosz Żeglarski Co-authored-by: Pavel Esir Co-authored-by: Alexander Suvorov Co-authored-by: Xiake Sun Co-authored-by: Damian Kalinowski Co-authored-by: Andrei Kochin Co-authored-by: Ekaterina Aidova Co-authored-by: guozhong wang --- samples/python/chat_sample/README.md | 2 +- src/cpp/src/llm_pipeline_static.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/samples/python/chat_sample/README.md b/samples/python/chat_sample/README.md index 66fe4b0d93..dc2c39b3a5 100644 --- a/samples/python/chat_sample/README.md +++ b/samples/python/chat_sample/README.md @@ -41,4 +41,4 @@ If you encounter an exception indicating a missing "chat template" when launchin The following template can be used as a default, but it may not work properly with every model: ``` "chat_template": "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n<|im_start|>assistant\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>\n'}}{% endif %}{% endfor %}", -``` \ No newline at end of file +``` diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index d05d928df6..c4ff0a90ab 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -161,8 +161,10 @@ StaticLLMPipeline::StaticLLMPipeline( */ ov::Core core; // (1) Read the template model - this will be kvcache model - auto kvcache_model = core.read_model(path / "openvino_model.xml"); - // (2) TODO: Expose KV-cache input and output layers from kvcache model + m_kvcache_model = core.read_model(path / "openvino_model.xml"); + // (2) Expose KV-cache input and output layers from kvcache model + ov::pass::StatefulToStateless().run_on_model(m_kvcache_model); + align_u4_zp_constants(m_kvcache_model); // (3) Clone the model - this will be prefill m_prefill_model = m_kvcache_model->clone(); m_prefill_model->set_friendly_name(m_kvcache_model->get_friendly_name() + "_prefill"); @@ -179,7 +181,7 @@ StaticLLMPipeline::StaticLLMPipeline( m_prefill_model, device, extract_config_or_default(config, "PREFILL_CONFIG") ).create_infer_request(); m_kvcache_request = core.compile_model( - kvcache_model, device, extract_config_or_default(config, "GENERATE_CONFIG") + m_kvcache_model, device, extract_config_or_default(config, "GENERATE_CONFIG") ).create_infer_request(); // (7) Initialize tensors prepare_for_new_conversation();