From add9ed4f440cc3f55ae4316007ed81fc48f289f8 Mon Sep 17 00:00:00 2001
From: sgolebiewski-intel <sebastianx.golebiewski@intel.com>
Date: Tue, 15 Oct 2024 12:34:04 +0200
Subject: [PATCH 1/7] Add GenAI Use Cases to docs

---
 .../llm_inference_guide/genai-guide.rst       |  54 +---
 .../genai-guide/genai-use-cases.rst           | 296 ++++++++++++++++++
 2 files changed, 297 insertions(+), 53 deletions(-)
 create mode 100644 docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst
index f1fd002b48072e..ebd4667d544616 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst
@@ -9,6 +9,7 @@ Run LLM Inference on OpenVINO with the GenAI Flavor
    :hidden:
 
    NPU inference of LLMs <genai-guide-npu>
+   genai-guide/genai-use-cases
 
 
 This guide will show you how to integrate the OpenVINO GenAI flavor into your application, covering
@@ -174,59 +175,6 @@ You can also create your custom streamer for more sophisticated processing:
             pipe.generate("The Sun is yellow because", ov::genai::streamer(custom_streamer), ov::genai::max_new_tokens(100));
          }
 
-Using GenAI in Chat Scenario
-################################
-
-For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache across inputs
-may prove beneficial. The chat-specific methods **start_chat** and **finish_chat** are used to
-mark a conversation session, as you can see in these simple examples:
-
-.. tab-set::
-
-   .. tab-item:: Python
-      :sync: py
-
-      .. code-block:: python
-
-         import openvino_genai as ov_genai
-         pipe = ov_genai.LLMPipeline(model_path)
-
-         pipe.set_generation_config({'max_new_tokens': 100)
-
-         pipe.start_chat()
-         while True:
-            print('question:')
-            prompt = input()
-            if prompt == 'Stop!':
-               break
-            print(pipe.generate(prompt))
-         pipe.finish_chat()
-
-
-   .. tab-item:: C++
-      :sync: cpp
-
-      .. code-block:: cpp
-
-         int main(int argc, char* argv[]) {
-            std::string prompt;
-
-            std::string model_path = argv[1];
-            ov::genai::LLMPipeline pipe(model_path, "CPU");
-
-            ov::genai::GenerationConfig config = pipe.get_generation_config();
-            config.max_new_tokens = 100;
-            pipe.set_generation_config(config)
-
-            pipe.start_chat();
-            for (size_t i = 0; i < questions.size(); i++) {
-               std::cout << "question:\n";
-               std::getline(std::cin, prompt);
-
-               std::cout << pipe.generate(prompt) << std::endl;
-            }
-            pipe.finish_chat();
-         }
 
 Optimizing Generation with Grouped Beam Search
 #######################################################
diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
new file mode 100644
index 00000000000000..65cb7df75933d0
--- /dev/null
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
@@ -0,0 +1,296 @@
+GenAI Use Cases
+=====================
+
+This article provides several use case scenarios for the inference on
+Generative AI Models. The applications presented in the code samples below
+only require minimal configuration, like setting an inference device. Feel free
+to explore and modify the source code as you need.
+
+
+Using GenAI in Text to Image Conversion
+#######################################
+
+Examples below demonstrate inference on text-to-image models, like Stable Diffusion
+1.5, 2.1, and LCM, with a text prompt as input. The :ref:`main.cpp <maincpp>`
+sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
+:ref:`lora.cpp <loracpp>` shows how to apply LoRA adapters to the pipeline.
+
+
+.. tab-set::
+
+   .. tab-item:: C++
+      :sync: cpp
+
+      .. tab-set::
+
+         .. tab-item:: main.cpp
+            :name: maincpp
+
+            .. code-block:: cpp
+
+               int32_t main(int32_t argc, char* argv[]) try {
+                   OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>'");
+
+                   const std::string models_path = argv[1], prompt = argv[2];
+                   const std::string device = "CPU";  // GPU, NPU can be used as well.
+
+                   ov::genai::Text2ImagePipeline pipe(models_path, device);
+                   ov::Tensor image = pipe.generate(prompt,
+                       ov::genai::width(512),
+                       ov::genai::height(512),
+                       ov::genai::num_inference_steps(20),
+                       ov::genai::num_images_per_prompt(1));
+
+                   // Saves images with a `num_images_per_prompt` name pattern.
+                   imwrite("image_%d.bmp", image, true);
+
+                   return EXIT_SUCCESS;
+               } catch (const std::exception& error) {
+                   try {
+                       std::cerr << error.what() << '\n';
+                   } catch (const std::ios_base::failure&) {}
+                   return EXIT_FAILURE;
+               } catch (...) {
+                   try {
+                       std::cerr << "Non-exception object thrown\n";
+                   } catch (const std::ios_base::failure&) {}
+                   return EXIT_FAILURE;
+               }
+
+         .. tab-item:: LoRA.cpp
+            :name: loracpp
+
+            .. code-block:: cpp
+
+               int32_t main(int32_t argc, char* argv[]) try {
+                   OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>' [<LORA_SAFETENSORS> <ALPHA> ...]]");
+
+                   const std::string models_path = argv[1], prompt = argv[2];
+                   const std::string device = "CPU";  // GPU, NPU can be used as well.
+
+                   ov::genai::AdapterConfig adapter_config;
+                   // Applying Multiple LoRA adapters simultaneously is supported. Parse them all and the corresponding alphas from cmd parameters:
+                   for(size_t i = 0; i < (argc - 3)/2; ++i) {
+                       ov::genai::Adapter adapter(argv[3 + 2*i]);
+                       float alpha = std::atof(argv[3 + 2*i + 1]);
+                       adapter_config.add(adapter, alpha);
+                   }
+
+                   // LoRA adapters passed to the constructor will be activated by default in the next generation.
+                   ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config));
+
+                   std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n";
+                   ov::Tensor image = pipe.generate(prompt,
+                       ov::genai::random_generator(std::make_shared<ov::genai::CppStdGenerator>(42)),
+                       ov::genai::width(512),
+                       ov::genai::height(896),
+                       ov::genai::num_inference_steps(20));
+                   imwrite("lora.bmp", image, true);
+
+                   std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n";
+                   image = pipe.generate(prompt,
+                       ov::genai::adapters(),  // Passing adapters as generation overrides set in the constructor; adapters() means no adapters.
+                       ov::genai::random_generator(std::make_shared<ov::genai::CppStdGenerator>(42)),
+                       ov::genai::width(512),
+                       ov::genai::height(896),
+                       ov::genai::num_inference_steps(20));
+                   imwrite("baseline.bmp", image, true);
+
+                   return EXIT_SUCCESS;
+               } catch (const std::exception& error) {
+                   try {
+                       std::cerr << error.what() << '\n';
+                   } catch (const std::ios_base::failure&) {}
+                   return EXIT_FAILURE;
+               } catch (...) {
+                   try {
+                       std::cerr << "Non-exception object thrown\n";
+                   } catch (const std::ios_base::failure&) {}
+                   return EXIT_FAILURE;
+               }
+
+
+      For more information, refer to the
+      `C++ sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/text2image/README.md>`__
+
+Using GenAI in Speech Recognition
+#################################
+
+
+The application, shown in code samples below, performs inference on speech
+recognition Whisper Models. The samples include the ``WhisperPipeline`` class
+and use audio files in WAV format at a sampling rate of 16 kHz as input.
+
+.. tab-set::
+
+   .. tab-item:: Python
+      :sync: cpp
+
+      .. code-block:: python
+
+         import argparse
+         import openvino_genai
+         import librosa
+
+
+         def read_wav(filepath):
+             raw_speech, samplerate = librosa.load(filepath, sr=16000)
+             return raw_speech.tolist()
+
+
+         def main():
+             parser = argparse.ArgumentParser()
+             parser.add_argument("model_dir")
+             parser.add_argument("wav_file_path")
+             args = parser.parse_args()
+
+             raw_speech = read_wav(args.wav_file_path)
+
+             pipe = openvino_genai.WhisperPipeline(args.model_dir)
+
+             def streamer(word: str) -> bool:
+                 print(word, end="")
+                 return False
+
+             result = pipe.generate(
+                 raw_speech,
+                 max_new_tokens=100,
+                 # 'task' and 'language' parameters are supported for multilingual models only
+                 language="<|en|>",
+                 task="transcribe",
+                 return_timestamps=True,
+                 streamer=streamer,
+             )
+
+             print()
+
+             for chunk in result.chunks:
+                 print(f"timestamps: [{chunk.start_ts}, {chunk.end_ts}] text: {chunk.text}")
+
+
+      For more information, refer to the
+      `Python sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/whisper_speech_recognition/README.md>`__.
+
+   .. tab-item:: C++
+      :sync: cpp
+
+      .. code-block:: cpp
+
+         int main(int argc, char* argv[]) try {
+             if (3 > argc) {
+                 throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> \"<WAV_FILE_PATH>\"");
+             }
+
+             std::string model_path = argv[1];
+             std::string wav_file_path = argv[2];
+
+             ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
+
+             ov::genai::WhisperPipeline pipeline{model_path};
+
+             ov::genai::WhisperGenerationConfig config{model_path + "/generation_config.json"};
+             config.max_new_tokens = 100;
+             // 'task' and 'language' parameters are supported for multilingual models only
+             config.language = "<|en|>";
+             config.task = "transcribe";
+             config.return_timestamps = true;
+
+             auto streamer = [](std::string word) {
+                 std::cout << word;
+                 return false;
+             };
+
+             auto result = pipeline.generate(raw_speech, config, streamer);
+
+             std::cout << "\n";
+
+             for (auto& chunk : *result.chunks) {
+                 std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n";
+             }
+         } catch (const std::exception& error) {
+             try {
+                 std::cerr << error.what() << '\n';
+             } catch (const std::ios_base::failure&) {
+             }
+             return EXIT_FAILURE;
+         } catch (...) {
+             try {
+                 std::cerr << "Non-exception object thrown\n";
+             } catch (const std::ios_base::failure&) {
+             }
+             return EXIT_FAILURE;
+         }
+
+
+      For more information, refer to the
+      `C++ sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/whisper_speech_recognition/README.md>`__.
+
+
+Using GenAI in Chat Scenario
+############################
+
+For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache across inputs
+may prove beneficial. The ``start_chat`` and ``finish_chat`` chat-specific methods are used to
+mark a conversation session, as shown in the samples below:
+
+.. tab-set::
+
+   .. tab-item:: Python
+      :sync: py
+
+      .. code-block:: python
+
+         import openvino_genai as ov_genai
+         pipe = ov_genai.LLMPipeline(model_path)
+
+         pipe.set_generation_config({'max_new_tokens': 100)
+
+         pipe.start_chat()
+         while True:
+            print('question:')
+            prompt = input()
+            if prompt == 'Stop!':
+               break
+            print(pipe.generate(prompt))
+         pipe.finish_chat()
+
+
+      For more information, refer to the
+      `Python sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/chat_sample/README.md>`__.
+
+   .. tab-item:: C++
+      :sync: cpp
+
+      .. code-block:: cpp
+
+         int main(int argc, char* argv[]) {
+            std::string prompt;
+
+            std::string model_path = argv[1];
+            ov::genai::LLMPipeline pipe(model_path, "CPU");
+
+            ov::genai::GenerationConfig config = pipe.get_generation_config();
+            config.max_new_tokens = 100;
+            pipe.set_generation_config(config)
+
+            pipe.start_chat();
+            for (size_t i = 0; i < questions.size(); i++) {
+               std::cout << "question:\n";
+               std::getline(std::cin, prompt);
+
+               std::cout << pipe.generate(prompt) << std::endl;
+            }
+            pipe.finish_chat();
+         }
+
+
+      For more information, refer to the
+      `C++ sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/chat_sample/README.md>`__
+
+Additional Resources
+#####################
+
+* :doc:`Install OpenVINO GenAI <../../../get-started/install-openvino/install-openvino-genai>`
+* `OpenVINO GenAI Repo <https://github.com/openvinotoolkit/openvino.genai>`__
+* `OpenVINO GenAI Samples <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples>`__
+* `OpenVINO Tokenizers <https://github.com/openvinotoolkit/openvino_tokenizers>`__

From ecd2e6c28cd53b08838a422a61445e3b743c0b59 Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Tue, 15 Oct 2024 17:09:07 +0200
Subject: [PATCH 2/7] Apply suggestions from code review

Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>
---
 .../llm_inference_guide/genai-guide/genai-use-cases.rst     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
index 65cb7df75933d0..ca337880b8eb9c 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
@@ -1,13 +1,13 @@
 GenAI Use Cases
 =====================
 
-This article provides several use case scenarios for the inference on
-Generative AI Models. The applications presented in the code samples below
+This article provides several use case scenarios for Generative AI model
+inference. The applications presented in the code samples below
 only require minimal configuration, like setting an inference device. Feel free
 to explore and modify the source code as you need.
 
 
-Using GenAI in Text to Image Conversion
+Using GenAI for Text-to-Image Generation
 #######################################
 
 Examples below demonstrate inference on text-to-image models, like Stable Diffusion

From 7cb2fa6463a82562eb5401d14a32223e7e15cc66 Mon Sep 17 00:00:00 2001
From: Karol Blaszczak <karol.blaszczak@intel.com>
Date: Wed, 16 Oct 2024 09:03:18 +0200
Subject: [PATCH 3/7] Update
 docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst

---
 .../llm_inference_guide/genai-guide/genai-use-cases.rst         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
index ca337880b8eb9c..3cbeb4ccb3c783 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
@@ -8,7 +8,7 @@ to explore and modify the source code as you need.
 
 
 Using GenAI for Text-to-Image Generation
-#######################################
+################################################
 
 Examples below demonstrate inference on text-to-image models, like Stable Diffusion
 1.5, 2.1, and LCM, with a text prompt as input. The :ref:`main.cpp <maincpp>`

From 08e4a066c128c5e0ce4daca9d6e14488132070ba Mon Sep 17 00:00:00 2001
From: sgolebiewski-intel <sebastianx.golebiewski@intel.com>
Date: Wed, 16 Oct 2024 10:09:57 +0200
Subject: [PATCH 4/7] Updating code snippets

---
 .../genai-guide/genai-use-cases.rst           | 133 ++++++++++--------
 1 file changed, 71 insertions(+), 62 deletions(-)

diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
index 3cbeb4ccb3c783..5b97e291774cf7 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
@@ -8,7 +8,7 @@ to explore and modify the source code as you need.
 
 
 Using GenAI for Text-to-Image Generation
-################################################
+########################################
 
 Examples below demonstrate inference on text-to-image models, like Stable Diffusion
 1.5, 2.1, and LCM, with a text prompt as input. The :ref:`main.cpp <maincpp>`
@@ -28,7 +28,11 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
 
             .. code-block:: cpp
 
-               int32_t main(int32_t argc, char* argv[]) try {
+               #include "openvino/genai/text2image/pipeline.hpp"
+
+               #include "imwrite.hpp"
+
+               int32_t main(int32_t argc, char* argv[]) {
                    OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>'");
 
                    const std::string models_path = argv[1], prompt = argv[2];
@@ -45,16 +49,6 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
                    imwrite("image_%d.bmp", image, true);
 
                    return EXIT_SUCCESS;
-               } catch (const std::exception& error) {
-                   try {
-                       std::cerr << error.what() << '\n';
-                   } catch (const std::ios_base::failure&) {}
-                   return EXIT_FAILURE;
-               } catch (...) {
-                   try {
-                       std::cerr << "Non-exception object thrown\n";
-                   } catch (const std::ios_base::failure&) {}
-                   return EXIT_FAILURE;
                }
 
          .. tab-item:: LoRA.cpp
@@ -62,7 +56,11 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
 
             .. code-block:: cpp
 
-               int32_t main(int32_t argc, char* argv[]) try {
+               #include "openvino/genai/text2image/pipeline.hpp"
+
+               #include "imwrite.hpp"
+
+               int32_t main(int32_t argc, char* argv[]) {
                    OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>' [<LORA_SAFETENSORS> <ALPHA> ...]]");
 
                    const std::string models_path = argv[1], prompt = argv[2];
@@ -97,16 +95,6 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
                    imwrite("baseline.bmp", image, true);
 
                    return EXIT_SUCCESS;
-               } catch (const std::exception& error) {
-                   try {
-                       std::cerr << error.what() << '\n';
-                   } catch (const std::ios_base::failure&) {}
-                   return EXIT_FAILURE;
-               } catch (...) {
-                   try {
-                       std::cerr << "Non-exception object thrown\n";
-                   } catch (const std::ios_base::failure&) {}
-                   return EXIT_FAILURE;
                }
 
 
@@ -155,7 +143,7 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input.
              result = pipe.generate(
                  raw_speech,
                  max_new_tokens=100,
-                 # 'task' and 'language' parameters are supported for multilingual models only
+                 # The 'task' and 'language' parameters are supported for multilingual models only.
                  language="<|en|>",
                  task="transcribe",
                  return_timestamps=True,
@@ -176,7 +164,7 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input.
 
       .. code-block:: cpp
 
-         int main(int argc, char* argv[]) try {
+         int main(int argc, char* argv[]) {
              if (3 > argc) {
                  throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> \"<WAV_FILE_PATH>\"");
              }
@@ -207,18 +195,6 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input.
              for (auto& chunk : *result.chunks) {
                  std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n";
              }
-         } catch (const std::exception& error) {
-             try {
-                 std::cerr << error.what() << '\n';
-             } catch (const std::ios_base::failure&) {
-             }
-             return EXIT_FAILURE;
-         } catch (...) {
-             try {
-                 std::cerr << "Non-exception object thrown\n";
-             } catch (const std::ios_base::failure&) {
-             }
-             return EXIT_FAILURE;
          }
 
 
@@ -240,19 +216,41 @@ mark a conversation session, as shown in the samples below:
 
       .. code-block:: python
 
-         import openvino_genai as ov_genai
-         pipe = ov_genai.LLMPipeline(model_path)
+         import argparse
+         import openvino_genai
+
+
+         def streamer(subword):
+             print(subword, end='', flush=True)
+             # The return flag corresponds to whether generation should be stopped or not.
+             # False means continue generation.
+             return False
 
-         pipe.set_generation_config({'max_new_tokens': 100)
 
-         pipe.start_chat()
-         while True:
-            print('question:')
-            prompt = input()
-            if prompt == 'Stop!':
-               break
-            print(pipe.generate(prompt))
-         pipe.finish_chat()
+         def main():
+             parser = argparse.ArgumentParser()
+             parser.add_argument('model_dir')
+             args = parser.parse_args()
+
+             device = 'CPU'  # GPU can be used as well.
+             pipe = openvino_genai.LLMPipeline(args.model_dir, device)
+
+             config = openvino_genai.GenerationConfig()
+             config.max_new_tokens = 100
+
+             pipe.start_chat()
+             while True:
+                 try:
+                     prompt = input('question:\n')
+                 except EOFError:
+                     break
+                 pipe.generate(prompt, config, streamer)
+                 print('\n----------')
+             pipe.finish_chat()
+
+
+         if '__main__' == __name__:
+             main()
 
 
       For more information, refer to the
@@ -263,24 +261,35 @@ mark a conversation session, as shown in the samples below:
 
       .. code-block:: cpp
 
-         int main(int argc, char* argv[]) {
-            std::string prompt;
+         #include "openvino/genai/llm_pipeline.hpp"
 
-            std::string model_path = argv[1];
-            ov::genai::LLMPipeline pipe(model_path, "CPU");
+         int main(int argc, char* argv[]) {
+             if (2 != argc) {
+                 throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR>");
+             }
+             std::string prompt;
+             std::string model_path = argv[1];
 
-            ov::genai::GenerationConfig config = pipe.get_generation_config();
-            config.max_new_tokens = 100;
-            pipe.set_generation_config(config)
+             std::string device = "CPU";  // GPU, NPU can be used as well
+             ov::genai::LLMPipeline pipe(model_path, device);
 
-            pipe.start_chat();
-            for (size_t i = 0; i < questions.size(); i++) {
-               std::cout << "question:\n";
-               std::getline(std::cin, prompt);
+             ov::genai::GenerationConfig config;
+             config.max_new_tokens = 100;
+             std::function<bool(std::string)> streamer = [](std::string word) {
+                 std::cout << word << std::flush;
+                 // Return flag corresponds whether generation should be stopped.
+                 // false means continue generation.
+                 return false;
+             };
 
-               std::cout << pipe.generate(prompt) << std::endl;
-            }
-            pipe.finish_chat();
+             pipe.start_chat();
+             std::cout << "question:\n";
+             while (std::getline(std::cin, prompt)) {
+                 pipe.generate(prompt, config, streamer);
+                 std::cout << "\n----------\n"
+                     "question:\n";
+             }
+             pipe.finish_chat();
          }
 
 

From aed3f69fed1d54949698a4c0fd1d6a1c6612512c Mon Sep 17 00:00:00 2001
From: sgolebiewski-intel <sebastianx.golebiewski@intel.com>
Date: Thu, 24 Oct 2024 09:54:23 +0200
Subject: [PATCH 5/7] Updating code snippets

---
 .../genai-guide/genai-use-cases.rst           | 96 ++++++++++++-------
 1 file changed, 63 insertions(+), 33 deletions(-)

diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
index 5b97e291774cf7..ede05cf49fef8c 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
@@ -32,11 +32,11 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
 
                #include "imwrite.hpp"
 
-               int32_t main(int32_t argc, char* argv[]) {
+               int32_t main(int32_t argc, char* argv[]) try {
                    OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>'");
 
                    const std::string models_path = argv[1], prompt = argv[2];
-                   const std::string device = "CPU";  // GPU, NPU can be used as well.
+                   const std::string device = "CPU";  // GPU, NPU can be used as well
 
                    ov::genai::Text2ImagePipeline pipe(models_path, device);
                    ov::Tensor image = pipe.generate(prompt,
@@ -45,10 +45,19 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
                        ov::genai::num_inference_steps(20),
                        ov::genai::num_images_per_prompt(1));
 
-                   // Saves images with a `num_images_per_prompt` name pattern.
                    imwrite("image_%d.bmp", image, true);
 
                    return EXIT_SUCCESS;
+               } catch (const std::exception& error) {
+                   try {
+                       std::cerr << error.what() << '\n';
+                   } catch (const std::ios_base::failure&) {}
+                   return EXIT_FAILURE;
+               } catch (...) {
+                   try {
+                       std::cerr << "Non-exception object thrown\n";
+                   } catch (const std::ios_base::failure&) {}
+                   return EXIT_FAILURE;
                }
 
          .. tab-item:: LoRA.cpp
@@ -60,21 +69,19 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
 
                #include "imwrite.hpp"
 
-               int32_t main(int32_t argc, char* argv[]) {
+               int32_t main(int32_t argc, char* argv[]) try {
                    OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>' [<LORA_SAFETENSORS> <ALPHA> ...]]");
 
                    const std::string models_path = argv[1], prompt = argv[2];
-                   const std::string device = "CPU";  // GPU, NPU can be used as well.
+                   const std::string device = "CPU";  // GPU, NPU can be used as well
 
                    ov::genai::AdapterConfig adapter_config;
-                   // Applying Multiple LoRA adapters simultaneously is supported. Parse them all and the corresponding alphas from cmd parameters:
                    for(size_t i = 0; i < (argc - 3)/2; ++i) {
                        ov::genai::Adapter adapter(argv[3 + 2*i]);
                        float alpha = std::atof(argv[3 + 2*i + 1]);
                        adapter_config.add(adapter, alpha);
                    }
 
-                   // LoRA adapters passed to the constructor will be activated by default in the next generation.
                    ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config));
 
                    std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n";
@@ -87,7 +94,7 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
 
                    std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n";
                    image = pipe.generate(prompt,
-                       ov::genai::adapters(),  // Passing adapters as generation overrides set in the constructor; adapters() means no adapters.
+                       ov::genai::adapters(),
                        ov::genai::random_generator(std::make_shared<ov::genai::CppStdGenerator>(42)),
                        ov::genai::width(512),
                        ov::genai::height(896),
@@ -95,6 +102,16 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
                    imwrite("baseline.bmp", image, true);
 
                    return EXIT_SUCCESS;
+               } catch (const std::exception& error) {
+                   try {
+                       std::cerr << error.what() << '\n';
+                   } catch (const std::ios_base::failure&) {}
+                   return EXIT_FAILURE;
+               } catch (...) {
+                   try {
+                       std::cerr << "Non-exception object thrown\n";
+                   } catch (const std::ios_base::failure&) {}
+                   return EXIT_FAILURE;
                }
 
 
@@ -116,7 +133,6 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input.
 
       .. code-block:: python
 
-         import argparse
          import openvino_genai
          import librosa
 
@@ -126,15 +142,9 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input.
              return raw_speech.tolist()
 
 
-         def main():
-             parser = argparse.ArgumentParser()
-             parser.add_argument("model_dir")
-             parser.add_argument("wav_file_path")
-             args = parser.parse_args()
-
-             raw_speech = read_wav(args.wav_file_path)
-
-             pipe = openvino_genai.WhisperPipeline(args.model_dir)
+         def infer(model_dir: str, wav_file_path: str):
+             raw_speech = read_wav(wav_file_path)
+             pipe = openvino_genai.WhisperPipeline(model_dir)
 
              def streamer(word: str) -> bool:
                  print(word, end="")
@@ -143,7 +153,6 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input.
              result = pipe.generate(
                  raw_speech,
                  max_new_tokens=100,
-                 # The 'task' and 'language' parameters are supported for multilingual models only.
                  language="<|en|>",
                  task="transcribe",
                  return_timestamps=True,
@@ -151,7 +160,6 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input.
              )
 
              print()
-
              for chunk in result.chunks:
                  print(f"timestamps: [{chunk.start_ts}, {chunk.end_ts}] text: {chunk.text}")
 
@@ -164,21 +172,24 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input.
 
       .. code-block:: cpp
 
-         int main(int argc, char* argv[]) {
+         #include "audio_utils.hpp"
+         #include "openvino/genai/whisper_pipeline.hpp"
+
+         int main(int argc, char* argv[]) try {
              if (3 > argc) {
                  throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> \"<WAV_FILE_PATH>\"");
              }
 
-             std::string model_path = argv[1];
+             std::filesystem::path models_path = argv[1];
              std::string wav_file_path = argv[2];
+             std::string device = "CPU"; // GPU can be used as well
 
-             ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
+             ov::genai::WhisperPipeline pipeline(models_path, device);
 
-             ov::genai::WhisperPipeline pipeline{model_path};
+             ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
 
-             ov::genai::WhisperGenerationConfig config{model_path + "/generation_config.json"};
+             ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json");
              config.max_new_tokens = 100;
-             // 'task' and 'language' parameters are supported for multilingual models only
              config.language = "<|en|>";
              config.task = "transcribe";
              config.return_timestamps = true;
@@ -195,6 +206,19 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input.
              for (auto& chunk : *result.chunks) {
                  std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n";
              }
+
+         } catch (const std::exception& error) {
+             try {
+                 std::cerr << error.what() << '\n';
+             } catch (const std::ios_base::failure&) {
+             }
+             return EXIT_FAILURE;
+         } catch (...) {
+             try {
+                 std::cerr << "Non-exception object thrown\n";
+             } catch (const std::ios_base::failure&) {
+             }
+             return EXIT_FAILURE;
          }
 
 
@@ -222,8 +246,6 @@ mark a conversation session, as shown in the samples below:
 
          def streamer(subword):
              print(subword, end='', flush=True)
-             # The return flag corresponds to whether generation should be stopped or not.
-             # False means continue generation.
              return False
 
 
@@ -263,22 +285,20 @@ mark a conversation session, as shown in the samples below:
 
          #include "openvino/genai/llm_pipeline.hpp"
 
-         int main(int argc, char* argv[]) {
+         int main(int argc, char* argv[]) try {
              if (2 != argc) {
                  throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR>");
              }
              std::string prompt;
-             std::string model_path = argv[1];
+             std::string models_path = argv[1];
 
              std::string device = "CPU";  // GPU, NPU can be used as well
-             ov::genai::LLMPipeline pipe(model_path, device);
+             ov::genai::LLMPipeline pipe(models_path, device);
 
              ov::genai::GenerationConfig config;
              config.max_new_tokens = 100;
              std::function<bool(std::string)> streamer = [](std::string word) {
                  std::cout << word << std::flush;
-                 // Return flag corresponds whether generation should be stopped.
-                 // false means continue generation.
                  return false;
              };
 
@@ -290,6 +310,16 @@ mark a conversation session, as shown in the samples below:
                      "question:\n";
              }
              pipe.finish_chat();
+         } catch (const std::exception& error) {
+             try {
+                 std::cerr << error.what() << '\n';
+             } catch (const std::ios_base::failure&) {}
+             return EXIT_FAILURE;
+         } catch (...) {
+             try {
+                 std::cerr << "Non-exception object thrown\n";
+             } catch (const std::ios_base::failure&) {}
+             return EXIT_FAILURE;
          }
 
 

From d3a6588f2b6ae29793cde3f6a9194634a2089ade Mon Sep 17 00:00:00 2001
From: sgolebiewski-intel <sebastianx.golebiewski@intel.com>
Date: Thu, 24 Oct 2024 14:35:54 +0200
Subject: [PATCH 6/7] Adding Python text2image code samples

---
 .../genai-guide/genai-use-cases.rst           | 123 +++++++++++++++++-
 1 file changed, 120 insertions(+), 3 deletions(-)

diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
index ede05cf49fef8c..af1c70d0901e39 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
@@ -18,6 +18,122 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
 
 .. tab-set::
 
+   .. tab-item:: Python
+      :sync: python
+
+      .. tab-set::
+
+         .. tab-item:: main.py
+            :name: mainpy
+
+            .. code-block:: python
+
+               import argparse
+
+               import openvino_genai
+               from PIL import Image
+               import numpy as np
+
+               class Generator(openvino_genai.Generator):
+                   def __init__(self, seed, mu=0.0, sigma=1.0):
+                       openvino_genai.Generator.__init__(self)
+                       np.random.seed(seed)
+                       self.mu = mu
+                       self.sigma = sigma
+
+                   def next(self):
+                       return np.random.normal(self.mu, self.sigma)
+
+
+               def main():
+                   parser = argparse.ArgumentParser()
+                   parser.add_argument('model_dir')
+                   parser.add_argument('prompt')
+                   args = parser.parse_args()
+
+                   device = 'CPU'  # GPU can be used as well
+                   random_generator = Generator(42)
+                   pipe = openvino_genai.Text2ImagePipeline(args.model_dir, device)
+                   image_tensor = pipe.generate(
+                       args.prompt,
+                       width=512,
+                       height=512,
+                       num_inference_steps=20,
+                       num_images_per_prompt=1,
+                       random_generator=random_generator
+                   )
+
+                   image = Image.fromarray(image_tensor.data[0])
+                   image.save("image.bmp")
+
+         .. tab-item:: LoRA.py
+            :name: lorapy
+
+            .. code-block:: python
+
+               import argparse
+
+               import openvino as ov
+               import openvino_genai
+               import numpy as np
+               import sys
+
+
+               class Generator(openvino_genai.Generator):
+                   def __init__(self, seed, mu=0.0, sigma=1.0):
+                       openvino_genai.Generator.__init__(self)
+                       np.random.seed(seed)
+                       self.mu = mu
+                       self.sigma = sigma
+
+                   def next(self):
+                       return np.random.normal(self.mu, self.sigma)
+
+
+               def image_write(path: str, image_tensor: ov.Tensor):
+                   from PIL import Image
+                   image = Image.fromarray(image_tensor.data[0])
+                   image.save(path)
+
+
+               def main():
+                   parser = argparse.ArgumentParser()
+                   parser.add_argument('models_path')
+                   parser.add_argument('prompt')
+                   args, adapters = parser.parse_known_args()
+
+                   prompt = args.prompt
+
+                   device = "CPU"  # GPU, NPU can be used as well
+                   adapter_config = openvino_genai.AdapterConfig()
+
+                   for i in range(int(len(adapters) / 2)):
+                       adapter = openvino_genai.Adapter(adapters[2 * i])
+                       alpha = float(adapters[2 * i + 1])
+                       adapter_config.add(adapter, alpha)
+
+                   pipe = openvino_genai.Text2ImagePipeline(args.models_path, device, adapters=adapter_config)
+                   print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp")
+                   image = pipe.generate(prompt,
+                                         random_generator=Generator(42),
+                                         width=512,
+                                         height=896,
+                                         num_inference_steps=20)
+
+                   image_write("lora.bmp", image)
+                   print("Generating image without LoRA adapters applied, resulting image will be in baseline.bmp")
+                   image = pipe.generate(prompt,
+                                         adapters=openvino_genai.AdapterConfig(),
+                                         random_generator=Generator(42),
+                                         width=512,
+                                         height=896,
+                                         num_inference_steps=20
+                                         )
+                   image_write("baseline.bmp", image)
+
+      For more information, refer to the
+      `Python sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text2image/README.md>`__
+
    .. tab-item:: C++
       :sync: cpp
 
@@ -118,6 +234,10 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
       For more information, refer to the
       `C++ sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/text2image/README.md>`__
 
+
+
+
+
 Using GenAI in Speech Recognition
 #################################
 
@@ -271,9 +391,6 @@ mark a conversation session, as shown in the samples below:
              pipe.finish_chat()
 
 
-         if '__main__' == __name__:
-             main()
-
 
       For more information, refer to the
       `Python sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/chat_sample/README.md>`__.

From a9902a3da9ca2319a5ac34e938864584e7016de6 Mon Sep 17 00:00:00 2001
From: sgolebiewski-intel <sebastianx.golebiewski@intel.com>
Date: Thu, 24 Oct 2024 15:35:45 +0200
Subject: [PATCH 7/7] Removing parser from Python samples

---
 .../genai-guide/genai-use-cases.rst           | 35 +++++--------------
 1 file changed, 8 insertions(+), 27 deletions(-)

diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
index af1c70d0901e39..953784c03fdef0 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
@@ -28,8 +28,6 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
 
             .. code-block:: python
 
-               import argparse
-
                import openvino_genai
                from PIL import Image
                import numpy as np
@@ -45,17 +43,12 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
                        return np.random.normal(self.mu, self.sigma)
 
 
-               def main():
-                   parser = argparse.ArgumentParser()
-                   parser.add_argument('model_dir')
-                   parser.add_argument('prompt')
-                   args = parser.parse_args()
-
+               def infer(model_dir: str, prompt: str):
                    device = 'CPU'  # GPU can be used as well
                    random_generator = Generator(42)
-                   pipe = openvino_genai.Text2ImagePipeline(args.model_dir, device)
+                   pipe = openvino_genai.Text2ImagePipeline(model_dir, device)
                    image_tensor = pipe.generate(
-                       args.prompt,
+                       prompt,
                        width=512,
                        height=512,
                        num_inference_steps=20,
@@ -71,8 +64,6 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
 
             .. code-block:: python
 
-               import argparse
-
                import openvino as ov
                import openvino_genai
                import numpy as np
@@ -96,13 +87,8 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
                    image.save(path)
 
 
-               def main():
-                   parser = argparse.ArgumentParser()
-                   parser.add_argument('models_path')
-                   parser.add_argument('prompt')
-                   args, adapters = parser.parse_known_args()
-
-                   prompt = args.prompt
+               def infer(models_path: str, prompt: str):
+                   prompt = "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
 
                    device = "CPU"  # GPU, NPU can be used as well
                    adapter_config = openvino_genai.AdapterConfig()
@@ -112,7 +98,7 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
                        alpha = float(adapters[2 * i + 1])
                        adapter_config.add(adapter, alpha)
 
-                   pipe = openvino_genai.Text2ImagePipeline(args.models_path, device, adapters=adapter_config)
+                   pipe = openvino_genai.Text2ImagePipeline(models_path, device, adapters=adapter_config)
                    print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp")
                    image = pipe.generate(prompt,
                                          random_generator=Generator(42),
@@ -360,7 +346,6 @@ mark a conversation session, as shown in the samples below:
 
       .. code-block:: python
 
-         import argparse
          import openvino_genai
 
 
@@ -369,13 +354,9 @@ mark a conversation session, as shown in the samples below:
              return False
 
 
-         def main():
-             parser = argparse.ArgumentParser()
-             parser.add_argument('model_dir')
-             args = parser.parse_args()
-
+         def infer(model_dir: str):
              device = 'CPU'  # GPU can be used as well.
-             pipe = openvino_genai.LLMPipeline(args.model_dir, device)
+             pipe = openvino_genai.LLMPipeline(model_dir, device)
 
              config = openvino_genai.GenerationConfig()
              config.max_new_tokens = 100