From add9ed4f440cc3f55ae4316007ed81fc48f289f8 Mon Sep 17 00:00:00 2001 From: sgolebiewski-intel Date: Tue, 15 Oct 2024 12:34:04 +0200 Subject: [PATCH 1/7] Add GenAI Use Cases to docs --- .../llm_inference_guide/genai-guide.rst | 54 +--- .../genai-guide/genai-use-cases.rst | 296 ++++++++++++++++++ 2 files changed, 297 insertions(+), 53 deletions(-) create mode 100644 docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index f1fd002b48072e..ebd4667d544616 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -9,6 +9,7 @@ Run LLM Inference on OpenVINO with the GenAI Flavor :hidden: NPU inference of LLMs + genai-guide/genai-use-cases This guide will show you how to integrate the OpenVINO GenAI flavor into your application, covering @@ -174,59 +175,6 @@ You can also create your custom streamer for more sophisticated processing: pipe.generate("The Sun is yellow because", ov::genai::streamer(custom_streamer), ov::genai::max_new_tokens(100)); } -Using GenAI in Chat Scenario -################################ - -For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache across inputs -may prove beneficial. The chat-specific methods **start_chat** and **finish_chat** are used to -mark a conversation session, as you can see in these simple examples: - -.. tab-set:: - - .. tab-item:: Python - :sync: py - - .. code-block:: python - - import openvino_genai as ov_genai - pipe = ov_genai.LLMPipeline(model_path) - - pipe.set_generation_config({'max_new_tokens': 100) - - pipe.start_chat() - while True: - print('question:') - prompt = input() - if prompt == 'Stop!': - break - print(pipe.generate(prompt)) - pipe.finish_chat() - - - .. tab-item:: C++ - :sync: cpp - - .. code-block:: cpp - - int main(int argc, char* argv[]) { - std::string prompt; - - std::string model_path = argv[1]; - ov::genai::LLMPipeline pipe(model_path, "CPU"); - - ov::genai::GenerationConfig config = pipe.get_generation_config(); - config.max_new_tokens = 100; - pipe.set_generation_config(config) - - pipe.start_chat(); - for (size_t i = 0; i < questions.size(); i++) { - std::cout << "question:\n"; - std::getline(std::cin, prompt); - - std::cout << pipe.generate(prompt) << std::endl; - } - pipe.finish_chat(); - } Optimizing Generation with Grouped Beam Search ####################################################### diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst new file mode 100644 index 00000000000000..65cb7df75933d0 --- /dev/null +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst @@ -0,0 +1,296 @@ +GenAI Use Cases +===================== + +This article provides several use case scenarios for the inference on +Generative AI Models. The applications presented in the code samples below +only require minimal configuration, like setting an inference device. Feel free +to explore and modify the source code as you need. + + +Using GenAI in Text to Image Conversion +####################################### + +Examples below demonstrate inference on text-to-image models, like Stable Diffusion +1.5, 2.1, and LCM, with a text prompt as input. The :ref:`main.cpp ` +sample shows basic usage of the ``Text2ImagePipeline`` pipeline. +:ref:`lora.cpp ` shows how to apply LoRA adapters to the pipeline. + + +.. tab-set:: + + .. tab-item:: C++ + :sync: cpp + + .. tab-set:: + + .. tab-item:: main.cpp + :name: maincpp + + .. code-block:: cpp + + int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU, NPU can be used as well. + + ov::genai::Text2ImagePipeline pipe(models_path, device); + ov::Tensor image = pipe.generate(prompt, + ov::genai::width(512), + ov::genai::height(512), + ov::genai::num_inference_steps(20), + ov::genai::num_images_per_prompt(1)); + + // Saves images with a `num_images_per_prompt` name pattern. + imwrite("image_%d.bmp", image, true); + + return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + .. tab-item:: LoRA.cpp + :name: loracpp + + .. code-block:: cpp + + int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU, NPU can be used as well. + + ov::genai::AdapterConfig adapter_config; + // Applying Multiple LoRA adapters simultaneously is supported. Parse them all and the corresponding alphas from cmd parameters: + for(size_t i = 0; i < (argc - 3)/2; ++i) { + ov::genai::Adapter adapter(argv[3 + 2*i]); + float alpha = std::atof(argv[3 + 2*i + 1]); + adapter_config.add(adapter, alpha); + } + + // LoRA adapters passed to the constructor will be activated by default in the next generation. + ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); + + std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n"; + ov::Tensor image = pipe.generate(prompt, + ov::genai::random_generator(std::make_shared(42)), + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20)); + imwrite("lora.bmp", image, true); + + std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; + image = pipe.generate(prompt, + ov::genai::adapters(), // Passing adapters as generation overrides set in the constructor; adapters() means no adapters. + ov::genai::random_generator(std::make_shared(42)), + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20)); + imwrite("baseline.bmp", image, true); + + return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__ + +Using GenAI in Speech Recognition +################################# + + +The application, shown in code samples below, performs inference on speech +recognition Whisper Models. The samples include the ``WhisperPipeline`` class +and use audio files in WAV format at a sampling rate of 16 kHz as input. + +.. tab-set:: + + .. tab-item:: Python + :sync: cpp + + .. code-block:: python + + import argparse + import openvino_genai + import librosa + + + def read_wav(filepath): + raw_speech, samplerate = librosa.load(filepath, sr=16000) + return raw_speech.tolist() + + + def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_dir") + parser.add_argument("wav_file_path") + args = parser.parse_args() + + raw_speech = read_wav(args.wav_file_path) + + pipe = openvino_genai.WhisperPipeline(args.model_dir) + + def streamer(word: str) -> bool: + print(word, end="") + return False + + result = pipe.generate( + raw_speech, + max_new_tokens=100, + # 'task' and 'language' parameters are supported for multilingual models only + language="<|en|>", + task="transcribe", + return_timestamps=True, + streamer=streamer, + ) + + print() + + for chunk in result.chunks: + print(f"timestamps: [{chunk.start_ts}, {chunk.end_ts}] text: {chunk.text}") + + + For more information, refer to the + `Python sample `__. + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + int main(int argc, char* argv[]) try { + if (3 > argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); + } + + std::string model_path = argv[1]; + std::string wav_file_path = argv[2]; + + ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path); + + ov::genai::WhisperPipeline pipeline{model_path}; + + ov::genai::WhisperGenerationConfig config{model_path + "/generation_config.json"}; + config.max_new_tokens = 100; + // 'task' and 'language' parameters are supported for multilingual models only + config.language = "<|en|>"; + config.task = "transcribe"; + config.return_timestamps = true; + + auto streamer = [](std::string word) { + std::cout << word; + return false; + }; + + auto result = pipeline.generate(raw_speech, config, streamer); + + std::cout << "\n"; + + for (auto& chunk : *result.chunks) { + std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n"; + } + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__. + + +Using GenAI in Chat Scenario +############################ + +For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache across inputs +may prove beneficial. The ``start_chat`` and ``finish_chat`` chat-specific methods are used to +mark a conversation session, as shown in the samples below: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: python + + import openvino_genai as ov_genai + pipe = ov_genai.LLMPipeline(model_path) + + pipe.set_generation_config({'max_new_tokens': 100) + + pipe.start_chat() + while True: + print('question:') + prompt = input() + if prompt == 'Stop!': + break + print(pipe.generate(prompt)) + pipe.finish_chat() + + + For more information, refer to the + `Python sample `__. + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + int main(int argc, char* argv[]) { + std::string prompt; + + std::string model_path = argv[1]; + ov::genai::LLMPipeline pipe(model_path, "CPU"); + + ov::genai::GenerationConfig config = pipe.get_generation_config(); + config.max_new_tokens = 100; + pipe.set_generation_config(config) + + pipe.start_chat(); + for (size_t i = 0; i < questions.size(); i++) { + std::cout << "question:\n"; + std::getline(std::cin, prompt); + + std::cout << pipe.generate(prompt) << std::endl; + } + pipe.finish_chat(); + } + + + For more information, refer to the + `C++ sample `__ + +Additional Resources +##################### + +* :doc:`Install OpenVINO GenAI <../../../get-started/install-openvino/install-openvino-genai>` +* `OpenVINO GenAI Repo `__ +* `OpenVINO GenAI Samples `__ +* `OpenVINO Tokenizers `__ From ecd2e6c28cd53b08838a422a61445e3b743c0b59 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Tue, 15 Oct 2024 17:09:07 +0200 Subject: [PATCH 2/7] Apply suggestions from code review Co-authored-by: Karol Blaszczak --- .../llm_inference_guide/genai-guide/genai-use-cases.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst index 65cb7df75933d0..ca337880b8eb9c 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst @@ -1,13 +1,13 @@ GenAI Use Cases ===================== -This article provides several use case scenarios for the inference on -Generative AI Models. The applications presented in the code samples below +This article provides several use case scenarios for Generative AI model +inference. The applications presented in the code samples below only require minimal configuration, like setting an inference device. Feel free to explore and modify the source code as you need. -Using GenAI in Text to Image Conversion +Using GenAI for Text-to-Image Generation ####################################### Examples below demonstrate inference on text-to-image models, like Stable Diffusion From 7cb2fa6463a82562eb5401d14a32223e7e15cc66 Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Wed, 16 Oct 2024 09:03:18 +0200 Subject: [PATCH 3/7] Update docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst --- .../llm_inference_guide/genai-guide/genai-use-cases.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst index ca337880b8eb9c..3cbeb4ccb3c783 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst @@ -8,7 +8,7 @@ to explore and modify the source code as you need. Using GenAI for Text-to-Image Generation -####################################### +################################################ Examples below demonstrate inference on text-to-image models, like Stable Diffusion 1.5, 2.1, and LCM, with a text prompt as input. The :ref:`main.cpp ` From 08e4a066c128c5e0ce4daca9d6e14488132070ba Mon Sep 17 00:00:00 2001 From: sgolebiewski-intel Date: Wed, 16 Oct 2024 10:09:57 +0200 Subject: [PATCH 4/7] Updating code snippets --- .../genai-guide/genai-use-cases.rst | 133 ++++++++++-------- 1 file changed, 71 insertions(+), 62 deletions(-) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst index 3cbeb4ccb3c783..5b97e291774cf7 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst @@ -8,7 +8,7 @@ to explore and modify the source code as you need. Using GenAI for Text-to-Image Generation -################################################ +######################################## Examples below demonstrate inference on text-to-image models, like Stable Diffusion 1.5, 2.1, and LCM, with a text prompt as input. The :ref:`main.cpp ` @@ -28,7 +28,11 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. .. code-block:: cpp - int32_t main(int32_t argc, char* argv[]) try { + #include "openvino/genai/text2image/pipeline.hpp" + + #include "imwrite.hpp" + + int32_t main(int32_t argc, char* argv[]) { OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); const std::string models_path = argv[1], prompt = argv[2]; @@ -45,16 +49,6 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. imwrite("image_%d.bmp", image, true); return EXIT_SUCCESS; - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; } .. tab-item:: LoRA.cpp @@ -62,7 +56,11 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. .. code-block:: cpp - int32_t main(int32_t argc, char* argv[]) try { + #include "openvino/genai/text2image/pipeline.hpp" + + #include "imwrite.hpp" + + int32_t main(int32_t argc, char* argv[]) { OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); const std::string models_path = argv[1], prompt = argv[2]; @@ -97,16 +95,6 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. imwrite("baseline.bmp", image, true); return EXIT_SUCCESS; - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; } @@ -155,7 +143,7 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input. result = pipe.generate( raw_speech, max_new_tokens=100, - # 'task' and 'language' parameters are supported for multilingual models only + # The 'task' and 'language' parameters are supported for multilingual models only. language="<|en|>", task="transcribe", return_timestamps=True, @@ -176,7 +164,7 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input. .. code-block:: cpp - int main(int argc, char* argv[]) try { + int main(int argc, char* argv[]) { if (3 > argc) { throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); } @@ -207,18 +195,6 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input. for (auto& chunk : *result.chunks) { std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n"; } - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) { - } - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) { - } - return EXIT_FAILURE; } @@ -240,19 +216,41 @@ mark a conversation session, as shown in the samples below: .. code-block:: python - import openvino_genai as ov_genai - pipe = ov_genai.LLMPipeline(model_path) + import argparse + import openvino_genai + + + def streamer(subword): + print(subword, end='', flush=True) + # The return flag corresponds to whether generation should be stopped or not. + # False means continue generation. + return False - pipe.set_generation_config({'max_new_tokens': 100) - pipe.start_chat() - while True: - print('question:') - prompt = input() - if prompt == 'Stop!': - break - print(pipe.generate(prompt)) - pipe.finish_chat() + def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + args = parser.parse_args() + + device = 'CPU' # GPU can be used as well. + pipe = openvino_genai.LLMPipeline(args.model_dir, device) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + + pipe.start_chat() + while True: + try: + prompt = input('question:\n') + except EOFError: + break + pipe.generate(prompt, config, streamer) + print('\n----------') + pipe.finish_chat() + + + if '__main__' == __name__: + main() For more information, refer to the @@ -263,24 +261,35 @@ mark a conversation session, as shown in the samples below: .. code-block:: cpp - int main(int argc, char* argv[]) { - std::string prompt; + #include "openvino/genai/llm_pipeline.hpp" - std::string model_path = argv[1]; - ov::genai::LLMPipeline pipe(model_path, "CPU"); + int main(int argc, char* argv[]) { + if (2 != argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); + } + std::string prompt; + std::string model_path = argv[1]; - ov::genai::GenerationConfig config = pipe.get_generation_config(); - config.max_new_tokens = 100; - pipe.set_generation_config(config) + std::string device = "CPU"; // GPU, NPU can be used as well + ov::genai::LLMPipeline pipe(model_path, device); - pipe.start_chat(); - for (size_t i = 0; i < questions.size(); i++) { - std::cout << "question:\n"; - std::getline(std::cin, prompt); + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + std::function streamer = [](std::string word) { + std::cout << word << std::flush; + // Return flag corresponds whether generation should be stopped. + // false means continue generation. + return false; + }; - std::cout << pipe.generate(prompt) << std::endl; - } - pipe.finish_chat(); + pipe.start_chat(); + std::cout << "question:\n"; + while (std::getline(std::cin, prompt)) { + pipe.generate(prompt, config, streamer); + std::cout << "\n----------\n" + "question:\n"; + } + pipe.finish_chat(); } From aed3f69fed1d54949698a4c0fd1d6a1c6612512c Mon Sep 17 00:00:00 2001 From: sgolebiewski-intel Date: Thu, 24 Oct 2024 09:54:23 +0200 Subject: [PATCH 5/7] Updating code snippets --- .../genai-guide/genai-use-cases.rst | 96 ++++++++++++------- 1 file changed, 63 insertions(+), 33 deletions(-) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst index 5b97e291774cf7..ede05cf49fef8c 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst @@ -32,11 +32,11 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. #include "imwrite.hpp" - int32_t main(int32_t argc, char* argv[]) { + int32_t main(int32_t argc, char* argv[]) try { OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); const std::string models_path = argv[1], prompt = argv[2]; - const std::string device = "CPU"; // GPU, NPU can be used as well. + const std::string device = "CPU"; // GPU, NPU can be used as well ov::genai::Text2ImagePipeline pipe(models_path, device); ov::Tensor image = pipe.generate(prompt, @@ -45,10 +45,19 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. ov::genai::num_inference_steps(20), ov::genai::num_images_per_prompt(1)); - // Saves images with a `num_images_per_prompt` name pattern. imwrite("image_%d.bmp", image, true); return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; } .. tab-item:: LoRA.cpp @@ -60,21 +69,19 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. #include "imwrite.hpp" - int32_t main(int32_t argc, char* argv[]) { + int32_t main(int32_t argc, char* argv[]) try { OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); const std::string models_path = argv[1], prompt = argv[2]; - const std::string device = "CPU"; // GPU, NPU can be used as well. + const std::string device = "CPU"; // GPU, NPU can be used as well ov::genai::AdapterConfig adapter_config; - // Applying Multiple LoRA adapters simultaneously is supported. Parse them all and the corresponding alphas from cmd parameters: for(size_t i = 0; i < (argc - 3)/2; ++i) { ov::genai::Adapter adapter(argv[3 + 2*i]); float alpha = std::atof(argv[3 + 2*i + 1]); adapter_config.add(adapter, alpha); } - // LoRA adapters passed to the constructor will be activated by default in the next generation. ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n"; @@ -87,7 +94,7 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; image = pipe.generate(prompt, - ov::genai::adapters(), // Passing adapters as generation overrides set in the constructor; adapters() means no adapters. + ov::genai::adapters(), ov::genai::random_generator(std::make_shared(42)), ov::genai::width(512), ov::genai::height(896), @@ -95,6 +102,16 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. imwrite("baseline.bmp", image, true); return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; } @@ -116,7 +133,6 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input. .. code-block:: python - import argparse import openvino_genai import librosa @@ -126,15 +142,9 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input. return raw_speech.tolist() - def main(): - parser = argparse.ArgumentParser() - parser.add_argument("model_dir") - parser.add_argument("wav_file_path") - args = parser.parse_args() - - raw_speech = read_wav(args.wav_file_path) - - pipe = openvino_genai.WhisperPipeline(args.model_dir) + def infer(model_dir: str, wav_file_path: str): + raw_speech = read_wav(wav_file_path) + pipe = openvino_genai.WhisperPipeline(model_dir) def streamer(word: str) -> bool: print(word, end="") @@ -143,7 +153,6 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input. result = pipe.generate( raw_speech, max_new_tokens=100, - # The 'task' and 'language' parameters are supported for multilingual models only. language="<|en|>", task="transcribe", return_timestamps=True, @@ -151,7 +160,6 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input. ) print() - for chunk in result.chunks: print(f"timestamps: [{chunk.start_ts}, {chunk.end_ts}] text: {chunk.text}") @@ -164,21 +172,24 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input. .. code-block:: cpp - int main(int argc, char* argv[]) { + #include "audio_utils.hpp" + #include "openvino/genai/whisper_pipeline.hpp" + + int main(int argc, char* argv[]) try { if (3 > argc) { throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); } - std::string model_path = argv[1]; + std::filesystem::path models_path = argv[1]; std::string wav_file_path = argv[2]; + std::string device = "CPU"; // GPU can be used as well - ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path); + ov::genai::WhisperPipeline pipeline(models_path, device); - ov::genai::WhisperPipeline pipeline{model_path}; + ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path); - ov::genai::WhisperGenerationConfig config{model_path + "/generation_config.json"}; + ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json"); config.max_new_tokens = 100; - // 'task' and 'language' parameters are supported for multilingual models only config.language = "<|en|>"; config.task = "transcribe"; config.return_timestamps = true; @@ -195,6 +206,19 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input. for (auto& chunk : *result.chunks) { std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n"; } + + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; } @@ -222,8 +246,6 @@ mark a conversation session, as shown in the samples below: def streamer(subword): print(subword, end='', flush=True) - # The return flag corresponds to whether generation should be stopped or not. - # False means continue generation. return False @@ -263,22 +285,20 @@ mark a conversation session, as shown in the samples below: #include "openvino/genai/llm_pipeline.hpp" - int main(int argc, char* argv[]) { + int main(int argc, char* argv[]) try { if (2 != argc) { throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); } std::string prompt; - std::string model_path = argv[1]; + std::string models_path = argv[1]; std::string device = "CPU"; // GPU, NPU can be used as well - ov::genai::LLMPipeline pipe(model_path, device); + ov::genai::LLMPipeline pipe(models_path, device); ov::genai::GenerationConfig config; config.max_new_tokens = 100; std::function streamer = [](std::string word) { std::cout << word << std::flush; - // Return flag corresponds whether generation should be stopped. - // false means continue generation. return false; }; @@ -290,6 +310,16 @@ mark a conversation session, as shown in the samples below: "question:\n"; } pipe.finish_chat(); + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; } From d3a6588f2b6ae29793cde3f6a9194634a2089ade Mon Sep 17 00:00:00 2001 From: sgolebiewski-intel Date: Thu, 24 Oct 2024 14:35:54 +0200 Subject: [PATCH 6/7] Adding Python text2image code samples --- .../genai-guide/genai-use-cases.rst | 123 +++++++++++++++++- 1 file changed, 120 insertions(+), 3 deletions(-) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst index ede05cf49fef8c..af1c70d0901e39 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst @@ -18,6 +18,122 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. .. tab-set:: + .. tab-item:: Python + :sync: python + + .. tab-set:: + + .. tab-item:: main.py + :name: mainpy + + .. code-block:: python + + import argparse + + import openvino_genai + from PIL import Image + import numpy as np + + class Generator(openvino_genai.Generator): + def __init__(self, seed, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + np.random.seed(seed) + self.mu = mu + self.sigma = sigma + + def next(self): + return np.random.normal(self.mu, self.sigma) + + + def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + parser.add_argument('prompt') + args = parser.parse_args() + + device = 'CPU' # GPU can be used as well + random_generator = Generator(42) + pipe = openvino_genai.Text2ImagePipeline(args.model_dir, device) + image_tensor = pipe.generate( + args.prompt, + width=512, + height=512, + num_inference_steps=20, + num_images_per_prompt=1, + random_generator=random_generator + ) + + image = Image.fromarray(image_tensor.data[0]) + image.save("image.bmp") + + .. tab-item:: LoRA.py + :name: lorapy + + .. code-block:: python + + import argparse + + import openvino as ov + import openvino_genai + import numpy as np + import sys + + + class Generator(openvino_genai.Generator): + def __init__(self, seed, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + np.random.seed(seed) + self.mu = mu + self.sigma = sigma + + def next(self): + return np.random.normal(self.mu, self.sigma) + + + def image_write(path: str, image_tensor: ov.Tensor): + from PIL import Image + image = Image.fromarray(image_tensor.data[0]) + image.save(path) + + + def main(): + parser = argparse.ArgumentParser() + parser.add_argument('models_path') + parser.add_argument('prompt') + args, adapters = parser.parse_known_args() + + prompt = args.prompt + + device = "CPU" # GPU, NPU can be used as well + adapter_config = openvino_genai.AdapterConfig() + + for i in range(int(len(adapters) / 2)): + adapter = openvino_genai.Adapter(adapters[2 * i]) + alpha = float(adapters[2 * i + 1]) + adapter_config.add(adapter, alpha) + + pipe = openvino_genai.Text2ImagePipeline(args.models_path, device, adapters=adapter_config) + print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp") + image = pipe.generate(prompt, + random_generator=Generator(42), + width=512, + height=896, + num_inference_steps=20) + + image_write("lora.bmp", image) + print("Generating image without LoRA adapters applied, resulting image will be in baseline.bmp") + image = pipe.generate(prompt, + adapters=openvino_genai.AdapterConfig(), + random_generator=Generator(42), + width=512, + height=896, + num_inference_steps=20 + ) + image_write("baseline.bmp", image) + + For more information, refer to the + `Python sample `__ + .. tab-item:: C++ :sync: cpp @@ -118,6 +234,10 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. For more information, refer to the `C++ sample `__ + + + + Using GenAI in Speech Recognition ################################# @@ -271,9 +391,6 @@ mark a conversation session, as shown in the samples below: pipe.finish_chat() - if '__main__' == __name__: - main() - For more information, refer to the `Python sample `__. From a9902a3da9ca2319a5ac34e938864584e7016de6 Mon Sep 17 00:00:00 2001 From: sgolebiewski-intel Date: Thu, 24 Oct 2024 15:35:45 +0200 Subject: [PATCH 7/7] Removing parser from Python samples --- .../genai-guide/genai-use-cases.rst | 35 +++++-------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst index af1c70d0901e39..953784c03fdef0 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst @@ -28,8 +28,6 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. .. code-block:: python - import argparse - import openvino_genai from PIL import Image import numpy as np @@ -45,17 +43,12 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. return np.random.normal(self.mu, self.sigma) - def main(): - parser = argparse.ArgumentParser() - parser.add_argument('model_dir') - parser.add_argument('prompt') - args = parser.parse_args() - + def infer(model_dir: str, prompt: str): device = 'CPU' # GPU can be used as well random_generator = Generator(42) - pipe = openvino_genai.Text2ImagePipeline(args.model_dir, device) + pipe = openvino_genai.Text2ImagePipeline(model_dir, device) image_tensor = pipe.generate( - args.prompt, + prompt, width=512, height=512, num_inference_steps=20, @@ -71,8 +64,6 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. .. code-block:: python - import argparse - import openvino as ov import openvino_genai import numpy as np @@ -96,13 +87,8 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. image.save(path) - def main(): - parser = argparse.ArgumentParser() - parser.add_argument('models_path') - parser.add_argument('prompt') - args, adapters = parser.parse_known_args() - - prompt = args.prompt + def infer(models_path: str, prompt: str): + prompt = "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting" device = "CPU" # GPU, NPU can be used as well adapter_config = openvino_genai.AdapterConfig() @@ -112,7 +98,7 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline. alpha = float(adapters[2 * i + 1]) adapter_config.add(adapter, alpha) - pipe = openvino_genai.Text2ImagePipeline(args.models_path, device, adapters=adapter_config) + pipe = openvino_genai.Text2ImagePipeline(models_path, device, adapters=adapter_config) print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp") image = pipe.generate(prompt, random_generator=Generator(42), @@ -360,7 +346,6 @@ mark a conversation session, as shown in the samples below: .. code-block:: python - import argparse import openvino_genai @@ -369,13 +354,9 @@ mark a conversation session, as shown in the samples below: return False - def main(): - parser = argparse.ArgumentParser() - parser.add_argument('model_dir') - args = parser.parse_args() - + def infer(model_dir: str): device = 'CPU' # GPU can be used as well. - pipe = openvino_genai.LLMPipeline(args.model_dir, device) + pipe = openvino_genai.LLMPipeline(model_dir, device) config = openvino_genai.GenerationConfig() config.max_new_tokens = 100