Merge releases/2024/3 into master (#731)

Co-authored-by: Alina Kladieva <[email protected]> Co-authored-by: Anastasiia Pnevskaia <[email protected]> Co-authored-by: Nikita Malinin <[email protected]> Co-authored-by: Yaroslav Tarkan <[email protected]> Co-authored-by: Anatoliy Talamanov <[email protected]> Co-authored-by: Pavel Esir <[email protected]> Co-authored-by: Miłosz Żeglarski <[email protected]> Co-authored-by: Pavel Esir <[email protected]> Co-authored-by: Alexander Suvorov <[email protected]> Co-authored-by: Xiake Sun <[email protected]> Co-authored-by: Damian Kalinowski <[email protected]> Co-authored-by: Andrei Kochin <[email protected]> Co-authored-by: Ekaterina Aidova <[email protected]> Co-authored-by: guozhong wang <[email protected]>
openvinotoolkit · Aug 5, 2024 · dc9ef33 · dc9ef33
1 parent 3304798
commit dc9ef33
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 4 deletions.
diff --git a/samples/python/chat_sample/README.md b/samples/python/chat_sample/README.md
@@ -41,4 +41,4 @@ If you encounter an exception indicating a missing "chat template" when launchin
 The following template can be used as a default, but it may not work properly with every model:
 ```
 "chat_template": "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n<|im_start|>assistant\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>\n'}}{% endif %}{% endfor %}",
-```
+```
diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp
@@ -161,8 +161,10 @@ StaticLLMPipeline::StaticLLMPipeline(
     */
     ov::Core core;
     // (1) Read the template model - this will be kvcache model
-    auto kvcache_model = core.read_model(path / "openvino_model.xml");
-    // (2) TODO: Expose KV-cache input and output layers from kvcache model
+    m_kvcache_model = core.read_model(path / "openvino_model.xml");
+    // (2) Expose KV-cache input and output layers from kvcache model
+    ov::pass::StatefulToStateless().run_on_model(m_kvcache_model);
+    align_u4_zp_constants(m_kvcache_model);
     // (3) Clone the model - this will be prefill
     m_prefill_model = m_kvcache_model->clone();
     m_prefill_model->set_friendly_name(m_kvcache_model->get_friendly_name() + "_prefill");
@@ -179,7 +181,7 @@ StaticLLMPipeline::StaticLLMPipeline(
         m_prefill_model, device, extract_config_or_default(config, "PREFILL_CONFIG")
     ).create_infer_request();
     m_kvcache_request = core.compile_model(
-        kvcache_model, device, extract_config_or_default(config, "GENERATE_CONFIG")
+        m_kvcache_model, device, extract_config_or_default(config, "GENERATE_CONFIG")
     ).create_infer_request();
     // (7) Initialize tensors
     prepare_for_new_conversation();
-Original file line number
+Diff line change
@@ Expand Up @@
     The following template can be used as a default, but it may not work properly with every model:
     ```
     "chat_template": "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n<|im_start|>assistant\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>\n'}}{% endif %}{% endfor %}",
-    ```
+    ```