From e3c3e496a7a70027ff2eaf3541c8672b7d5d6321 Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Wed, 8 Jan 2025 12:18:18 +0100 Subject: [PATCH] [DOCS] close issue with base ov genai guide 24.6 --- .../llm-inference-native-ov.rst | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst index d33ae05f68f462..cb10e162e77c28 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst @@ -98,6 +98,17 @@ Learn more in Loading an LLM with OpenVINO. optimum-cli export openvino --convert-tokenizer --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 openvino_model +.. note:: + + The current Optimum version can convert both the model and tokenizers. To do so, use the + standard call: + + .. code-block:: py + + optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 openvino_model + + + Full OpenVINO Text Generation Pipeline ###################################################################### @@ -110,6 +121,7 @@ Use the model and tokenizer converted from the previous step: import numpy as np from openvino import compile_model + import openvino_tokenizers # Compile the tokenizer, model, and detokenizer using OpenVINO. These files are XML representations of the models optimized for OpenVINO compiled_tokenizer = compile_model("openvino_tokenizer.xml") @@ -154,7 +166,7 @@ and appends it to the existing sequence. # Generate new tokens iteratively for idx in range(prompt_size, prompt_size + new_tokens_size): # Get output from the model - output = compiled_model(input_dict)["token_ids"] + output = compiled_model(input_dict)[0] # Update the input_ids with newly generated token input_dict["input_ids"][:, idx] = output[:, idx - 1] # Update the attention mask to include the new token