Added VLM bindings and a Python sample. (#914)

- Added VLM bindings. - Added Python VLM chat sample. - Added initialization of performance metrics with zeros. Tickets: CVS-153174, CVS-153173, CVS-153626 PR to miniCPM-V-2_6 branch: Wovchena/openvino.genai-public#62 --------- Co-authored-by: wenyi5608 <[email protected]> Co-authored-by: Yang,Su <[email protected]> Co-authored-by: Wovchena <[email protected]> Co-authored-by: Yaroslav Tarkan <[email protected]> Co-authored-by: Alina Kladieva <[email protected]> Co-authored-by: Pavel Esir <[email protected]> Co-authored-by: Pavel Esir <[email protected]> Co-authored-by: Artur Paniukov <[email protected]> Co-authored-by: Ekaterina Aidova <[email protected]> Co-authored-by: Ilya Lavrenov <[email protected]> Co-authored-by: Mikhail Ryzhov <[email protected]>
ScottZhang812 · Oct 9, 2024 · 221c56d · 221c56d
1 parent 28d5f95
commit 221c56d
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 3 deletions.
diff --git a/include/openvino/genai/llm_pipeline.hpp b/include/openvino/genai/llm_pipeline.hpp
@@ -270,7 +270,7 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
 };
 
 OPENVINO_GENAI_EXPORTS std::pair<std::string, Any> streamer(StreamerVariant func);
-std::pair<std::string, Any> generation_config(const GenerationConfig& config);
+OPENVINO_GENAI_EXPORTS std::pair<std::string, Any> generation_config(const GenerationConfig& config);
 
 }  // namespace genai
 }  // namespace ov
diff --git a/src/vlm_pipeline.cpp b/src/vlm_pipeline.cpp
@@ -533,8 +533,25 @@ DecodedResults VLMPipeline::generate(
             variable.reset();
         }
         m_language.get_tensor("attention_mask").set_shape({1, 0});
-    }
-    return {{m_tokenizer.decode(generated)}};
+    } 
+    DecodedResults results;
+    results.texts = {m_tokenizer.decode(generated)};
+
+    // TODO: implement performance metrics
+    results.perf_metrics = ov::genai::PerfMetrics();
+    results.perf_metrics.m_evaluated = false;
+    results.perf_metrics.generate_duration = {0, 0};
+    results.perf_metrics.inference_duration= {0, 0};
+    results.perf_metrics.tokenization_duration = {0, 0};
+    results.perf_metrics.detokenization_duration= {0, 0};
+    results.perf_metrics.ttft = {0, 0};
+    results.perf_metrics.tpot= {0, 0};
+    results.perf_metrics.ipot= {0, 0};
+    results.perf_metrics.throughput= {0, 0};
+    results.perf_metrics.num_generated_tokens = generated.size();
+    results.perf_metrics.num_input_tokens= 0;
+
+    return results;
 }
 
 DecodedResults VLMPipeline::generate(