From e3c3e496a7a70027ff2eaf3541c8672b7d5d6321 Mon Sep 17 00:00:00 2001
From: Karol Blaszczak <karol.blaszczak@intel.com>
Date: Wed, 8 Jan 2025 12:18:18 +0100
Subject: [PATCH] [DOCS] close issue with base ov genai guide 24.6

---
 .../llm-inference-native-ov.rst                    | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst
index d33ae05f68f462..cb10e162e77c28 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst
@@ -98,6 +98,17 @@ Learn more in Loading an LLM with OpenVINO.
 
    optimum-cli export openvino --convert-tokenizer --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 openvino_model
 
+.. note::
+
+   The current Optimum version can convert both the model and tokenizers. To do so, use the
+   standard call:
+
+   .. code-block:: py
+
+      optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 openvino_model
+
+
+
 Full OpenVINO Text Generation Pipeline
 ######################################################################
 
@@ -110,6 +121,7 @@ Use the model and tokenizer converted from the previous step:
 
    import numpy as np
    from openvino import compile_model
+   import openvino_tokenizers
 
    # Compile the tokenizer, model, and detokenizer using OpenVINO. These files are XML representations of the models optimized for OpenVINO
    compiled_tokenizer = compile_model("openvino_tokenizer.xml")
@@ -154,7 +166,7 @@ and appends it to the existing sequence.
    # Generate new tokens iteratively
    for idx in range(prompt_size, prompt_size + new_tokens_size):
        # Get output from the model
-       output = compiled_model(input_dict)["token_ids"]
+       output = compiled_model(input_dict)[0]
        # Update the input_ids with newly generated token
        input_dict["input_ids"][:, idx] = output[:, idx - 1]
        # Update the attention mask to include the new token