From 6aa1117e28d0340e424cfbbdc3b8514680c25011 Mon Sep 17 00:00:00 2001
From: Vitaliy Urusovskij <vitaliy.urusovskij@intel.com>
Date: Fri, 2 Feb 2024 12:44:52 -0800
Subject: [PATCH 1/7] Remove `CoreConfiguration()` (#864)

---
 .../tests/functional/core_config.cpp            | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/modules/nvidia_plugin/tests/functional/core_config.cpp b/modules/nvidia_plugin/tests/functional/core_config.cpp
index 274bd4894..03a6fb783 100644
--- a/modules/nvidia_plugin/tests/functional/core_config.cpp
+++ b/modules/nvidia_plugin/tests/functional/core_config.cpp
@@ -2,26 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "functional_test_utils/core_config.hpp"
-
 #include "cuda_test_constants.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
 
-void CoreConfiguration(LayerTestsUtils::LayerTestsCommon* test) {
-    std::shared_ptr<InferenceEngine::Core> core = PluginCache::get().ie();
-    ov::element::Type hint = ov::element::f32;
-    for (auto& param : test->GetFunction()->get_parameters()) {
-        if (param->get_output_element_type(0) == ov::element::f16) {
-            hint = ov::element::f16;
-            break;
-        }
-    }
-    // Set inference_precision hint to run fp32 model in fp32 runtime precision as default plugin execution precision
-    // may vary
-    std::map<std::string, std::string> config = {{ov::hint::inference_precision.name(), hint.get_type_name()}};
-    core->SetConfig(config, ov::test::utils::DEVICE_NVIDIA);
-}
-
 namespace ov {
 namespace test {
 

From a7293b4b904b4b7264490615d40ada43ef923122 Mon Sep 17 00:00:00 2001
From: kumarijy <112030960+kumarijy@users.noreply.github.com>
Date: Tue, 6 Feb 2024 09:07:45 -0800
Subject: [PATCH 2/7] [OPENVINO CODE] Added Fill-in-the-middle(FIM) support 
 (#848)

* Adding FIM support through deepseek-coder-1.3b-instruct and changed model names at FE

* used ALL_CAPS for model name consistency and replaced 7B with 1_3B

* fixed frontend modelname

* changed version to 0.0.8

* changed FIM tokens format in alignment with HF model deepseek-coder format

* added README for adding Fill in the middle mode support and updated OverviewSection

* removed int8 from decicoder-1b-openvino model name

* added deepseek-coder int-8 model
---
 modules/openvino_code/README.md               | 18 ++++++++
 modules/openvino_code/package-lock.json       |  4 +-
 modules/openvino_code/package.json            | 45 ++++++++++++++++---
 modules/openvino_code/shared/features.ts      |  1 +
 modules/openvino_code/shared/model.ts         | 10 +++--
 .../OverviewSection/OverviewSection.tsx       |  3 +-
 .../ServerSection/ModelSelect/ModelSelect.tsx |  3 +-
 7 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/modules/openvino_code/README.md b/modules/openvino_code/README.md
index cd5f5dc57..448dbcc35 100644
--- a/modules/openvino_code/README.md
+++ b/modules/openvino_code/README.md
@@ -8,6 +8,7 @@ OpenVINO Code provides the following features:
 
 - Inline Code Completion
 - Summarization via Docstring
+- Fill in the Middle Mode
 
 ## Working with Extension
 
@@ -48,6 +49,23 @@ You can select the desired type of quotes in the extension settings.
 The model can generate docstring in Code Completion mode, but in this case it is impossible to control the result. 
 In the docstring generation mode, various popular templates are available in the settings that will guide the model output.
 
+### Fill in the Middle Mode
+
+
+1. Create a new Python file or open an existing one.
+1. Type `def main():` or place the cursor where you'd like middle text to be generated.
+1. Press the keyboard shortcut `Ctrl+Alt+Space` (`Cmd+Alt+Space` for macOS) or click the `Generate Code Completion` button located in the side panel.
+1. You can select the text then generate the related code.
+1. You may also right-click on "Generate Inline Code Completion In New Tab" to generate code in a new tab.
+1. Use the `Tab` key to accept the entire suggestion or `Ctrl`+`Right Arrow` to accept it word by word. To decline the suggestion, press `Esc`.
+
+You can customize the length of the generated code by adjusting `Max New Tokens` and `Min New Tokens` parameters in the extension settings. 
+The number of generated tokens is also influenced by the `Server Request Timeout` setting.
+
+Fill in the middle mode brings in advanced code completion capabilities supporting fill-in-the-blank task, supporting project-level code completion and infilling tasks.
+
+To enable fill in the middle mode, check the `Fill In The Middle Mode` checkbox in the extension settings.
+
 ### Monitoring Extension Output
 
 To examine the input and output from the code generation API, follow these steps:
diff --git a/modules/openvino_code/package-lock.json b/modules/openvino_code/package-lock.json
index 89fa7c2e1..0b2521f49 100644
--- a/modules/openvino_code/package-lock.json
+++ b/modules/openvino_code/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "openvino-code-completion",
-  "version": "0.0.6",
+  "version": "0.0.8",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "openvino-code-completion",
-      "version": "0.0.6",
+      "version": "0.0.8",
       "license": "https://github.com/openvinotoolkit/openvino_contrib/blob/master/LICENSE",
       "workspaces": [
         "side-panel-ui"
diff --git a/modules/openvino_code/package.json b/modules/openvino_code/package.json
index ee16aa4b0..d3dd85362 100644
--- a/modules/openvino_code/package.json
+++ b/modules/openvino_code/package.json
@@ -1,7 +1,7 @@
 {
   "publisher": "OpenVINO",
   "name": "openvino-code-completion",
-  "version": "0.0.6",
+  "version": "0.0.8",
   "displayName": "OpenVINO Code Completion",
   "description": "VSCode extension for AI code completion with OpenVINO",
   "icon": "media/logo.png",
@@ -190,11 +190,12 @@
           "openvinoCode.model": {
             "order": 0,
             "type": "string",
-            "default": "codet5p-220m-py",
+            "default": "code-t5",
             "enum": [
-              "codet5p-220m-py",
-              "decicoder-1b-openvino-int8",
-              "stablecode-completion-3b-int8"
+              "code-t5",
+              "decicoder-1b-openvino",
+              "stablecode-completion",
+              "deepseek-coder"
             ],
             "description": "Which model to use for code generation."
           },
@@ -229,6 +230,40 @@
             "default": "false",
             "description": "When checked inline complention will be generated in streaming mode"
           },
+          "openvinoCode.fillInTheMiddleMode": {
+            "order": 4,
+            "type": "boolean",
+            "default": "false",
+            "description":
+              "When checked, text before (above) and after (below) the cursor will be used for completion generation. When unckecked, only text before (above) the cursor will be used."
+          },
+          "openvinoCode.startToken": {
+            "order": 7,
+            "type": "string",
+            "default": "< |fim_begin| >",
+            "description":
+              "String that is sent to server is in format: `{startToken}{text above cursor}{middleToken}{text below cursor if fillInTheMiddleMode=true}{endToken}`. Leave `startToken`, `middleToken`, or `endToken` empty if there is no special token for those placements."
+          },
+          "openvinoCode.middleToken": {
+            "order": 8,
+            "type": "string",
+            "default": "<｜fim▁hole｜>",
+            "description":
+              "String that is sent to server is in format: `{startToken}{text above cursor}{middleToken}{text below cursor if fillInTheMiddleMode=true}{endToken}`. Leave `startToken`, `middleToken`, or `endToken` empty if there is no special token for those placements."
+          },
+          "openvinoCode.endToken": {
+            "order": 9,
+            "type": "string",
+            "default": "<｜fim▁end｜>",
+            "description":
+              "String that is sent to server is in format: `{startToken}{text above cursor}{middleToken}{text below cursor if fillInTheMiddleMode=true}{endToken}`. Leave `startToken`, `middleToken`, or `endToken` empty if there is no special token for those placements."
+          },
+          "openvinoCode.stopToken": {
+            "order": 10,
+            "type": "string",
+            "default": "<|endoftext|>",
+            "description": "(Optional) Stop token."
+          },
           "openvinoCode.temperature": {
             "order": 4,
             "type": "number",
diff --git a/modules/openvino_code/shared/features.ts b/modules/openvino_code/shared/features.ts
index d376256b2..48a406c77 100644
--- a/modules/openvino_code/shared/features.ts
+++ b/modules/openvino_code/shared/features.ts
@@ -1,4 +1,5 @@
 export enum Features {
   CODE_COMPLETION = 'Code Completion',
   SUMMARIZATION = 'Summarization',
+  FIM = 'Fill-in-the-middle',
 }
diff --git a/modules/openvino_code/shared/model.ts b/modules/openvino_code/shared/model.ts
index 8d642c39a..e08930ecc 100644
--- a/modules/openvino_code/shared/model.ts
+++ b/modules/openvino_code/shared/model.ts
@@ -4,22 +4,26 @@ enum ModelId {
   CODE_T5_220M = 'Salesforce/codet5p-220m-py',
   DECICODER_1B_OPENVINO_INT8 = 'chgk13/decicoder-1b-openvino-int8',
   STABLECODE_COMPLETION_ALPHA_3B_4K_OPENVINO_INT8 = 'chgk13/stablecode-completion-alpha-3b-4k-openvino-int8',
+  DEEPSEEK_CODER_1_3B = 'kumarijy/deepseek-coder-1_3b-instruct-openvino-int8',
 }
 
 export enum ModelName {
-  CODE_T5_220M = 'codet5p-220m-py',
-  DECICODER_1B_OPENVINO_INT8 = 'decicoder-1b-openvino-int8',
-  STABLECODE_COMPLETION_ALPHA_3B_4K_OPENVINO_INT8 = 'stablecode-completion-3b-int8',
+  CODE_T5_220M = 'code-t5',
+  DECICODER_1B_OPENVINO_INT8 = 'decicoder-1b-openvino',
+  STABLECODE_COMPLETION_ALPHA_3B_4K_OPENVINO_INT8 = 'stablecode-completion',
+  DEEPSEEK_CODER_1_3B = 'deepseek-coder',
 }
 
 export const MODEL_NAME_TO_ID_MAP: Record<ModelName, ModelId> = {
   [ModelName.CODE_T5_220M]: ModelId.CODE_T5_220M,
   [ModelName.DECICODER_1B_OPENVINO_INT8]: ModelId.DECICODER_1B_OPENVINO_INT8,
   [ModelName.STABLECODE_COMPLETION_ALPHA_3B_4K_OPENVINO_INT8]: ModelId.STABLECODE_COMPLETION_ALPHA_3B_4K_OPENVINO_INT8,
+  [ModelName.DEEPSEEK_CODER_1_3B]: ModelId.DEEPSEEK_CODER_1_3B,
 };
 
 export const MODEL_SUPPORTED_FEATURES: Record<ModelName, Features[]> = {
   [ModelName.CODE_T5_220M]: [Features.CODE_COMPLETION],
   [ModelName.DECICODER_1B_OPENVINO_INT8]: [Features.CODE_COMPLETION, Features.SUMMARIZATION],
   [ModelName.STABLECODE_COMPLETION_ALPHA_3B_4K_OPENVINO_INT8]: [Features.CODE_COMPLETION, Features.SUMMARIZATION],
+  [ModelName.DEEPSEEK_CODER_1_3B]: [Features.CODE_COMPLETION, Features.SUMMARIZATION, Features.FIM],
 };
diff --git a/modules/openvino_code/side-panel-ui/src/components/sections/OverviewSection/OverviewSection.tsx b/modules/openvino_code/side-panel-ui/src/components/sections/OverviewSection/OverviewSection.tsx
index cf699f713..a785538eb 100644
--- a/modules/openvino_code/side-panel-ui/src/components/sections/OverviewSection/OverviewSection.tsx
+++ b/modules/openvino_code/side-panel-ui/src/components/sections/OverviewSection/OverviewSection.tsx
@@ -7,7 +7,8 @@ export function OverviewSection(): JSX.Element {
         OpenVINO Code provides the following features:
         <ul>
           <li>Inline Code Completion</li>
-          <li>Summarization via docstring</li>
+          <li>Summarization via docstring</li>  
+          <li>Fill in the Middle Mode</li>
         </ul>
         To use OpenVINO Code please start the server.
       </span>
diff --git a/modules/openvino_code/side-panel-ui/src/components/sections/ServerSection/ModelSelect/ModelSelect.tsx b/modules/openvino_code/side-panel-ui/src/components/sections/ServerSection/ModelSelect/ModelSelect.tsx
index 16746a7c7..06a1bc9ff 100644
--- a/modules/openvino_code/side-panel-ui/src/components/sections/ServerSection/ModelSelect/ModelSelect.tsx
+++ b/modules/openvino_code/side-panel-ui/src/components/sections/ServerSection/ModelSelect/ModelSelect.tsx
@@ -7,6 +7,7 @@ const options: SelectOptionProps<ModelName>[] = [
   { value: ModelName.CODE_T5_220M },
   { value: ModelName.DECICODER_1B_OPENVINO_INT8 },
   { value: ModelName.STABLECODE_COMPLETION_ALPHA_3B_4K_OPENVINO_INT8 },
+  { value: ModelName.DEEPSEEK_CODER_1_3B },
 ];
 
 interface ModelSelectProps {
@@ -34,7 +35,7 @@ export const ModelSelect = ({
         disabled={disabled}
         onChange={(value) => onChange(value)}
       ></Select>
-      {isServerStopped && <span>Supported Featues: {supportedFeatures.join(', ')}</span>}
+      {isServerStopped && <span>Supported Features: {supportedFeatures.join(', ')}</span>}
     </>
   );
 };

From d219e31b311973f1a66d4de76b4b6623adb13944 Mon Sep 17 00:00:00 2001
From: Tomasz Jankowski <tomasz1.jankowski@intel.com>
Date: Wed, 7 Feb 2024 12:19:28 +0100
Subject: [PATCH 3/7] Add ov::Exception wrapper (#869)

to access its protected constructor
---
 modules/nvidia_plugin/src/error.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/modules/nvidia_plugin/src/error.cpp b/modules/nvidia_plugin/src/error.cpp
index a3013ecdb..6615149cf 100644
--- a/modules/nvidia_plugin/src/error.cpp
+++ b/modules/nvidia_plugin/src/error.cpp
@@ -11,6 +11,11 @@
 namespace ov {
 namespace nvidia_gpu {
 namespace {
+class OVExceptionWrapper : public ov::Exception {
+public:
+    OVExceptionWrapper(const std::string& what) : ov::Exception(what) {}
+};
+
 template <typename T>
 [[gnu::cold, noreturn]] void throw_exception(const std::string& msg,
                                              const std::experimental::source_location& location) {
@@ -20,7 +25,7 @@ template <typename T>
 
 [[gnu::cold, noreturn]] void throw_ov_exception(const std::string& msg,
                                                 const std::experimental::source_location& location) {
-    throw_exception<ov::Exception>(msg, location);
+    throw_exception<OVExceptionWrapper>(msg, location);
 }
 
 [[gnu::cold]] void logError(const std::string& /*msg*/, const std::experimental::source_location& /*location*/) {

From 60c3035f2776b4dab7710a3242b304c250631726 Mon Sep 17 00:00:00 2001
From: Artur Paniukov <artur.paniukov@intel.com>
Date: Thu, 8 Feb 2024 17:48:13 +0000
Subject: [PATCH 4/7] [Tokenizers][TF FE] Fix MUSE conversion (#854)

* Fix MUSE conversion

* Fix MUSE conversion

* Add Type Argument To Greedy Decoding

* Del PackedString Representation for Sentencepiece

* Del includes

* Del vars for packed strings

* Revert Reshape Translator and Clean Up Unused Code

* Add Decomposed Strings Input Back
---
 .../tokenizer/sentence_piece.cpp              | 71 ++++++++-----------
 .../tokenizer/tensorflow_translators.cpp      | 26 +------
 2 files changed, 31 insertions(+), 66 deletions(-)

diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/sentence_piece.cpp b/modules/custom_operations/user_ie_extensions/tokenizer/sentence_piece.cpp
index f6f75ae95..3804ae48f 100644
--- a/modules/custom_operations/user_ie_extensions/tokenizer/sentence_piece.cpp
+++ b/modules/custom_operations/user_ie_extensions/tokenizer/sentence_piece.cpp
@@ -91,26 +91,21 @@ SentencepieceTokenizer::SentencepieceTokenizer(const OutputVector& args, const s
 }
 
 void SentencepieceTokenizer::validate_and_infer_types() {
-
-    #if SENTENCE_PIECE_EXTENSION_DECOMPOSED_STRINGS
-
-    FRONT_END_GENERAL_CHECK(get_input_size() == 1 + 3, "SentencepieceTokenizer expects 4 inputs: sp model and input sentences represented as 3 decomposed tensors (begins, ends, sybols)");
-    FRONT_END_GENERAL_CHECK(get_input_element_type(0) == element::u8, "SentencepieceTokenizer accepts sp model as the first input and it should be of type u8 tensor");
-    FRONT_END_GENERAL_CHECK(get_input_element_type(1) == element::i32, "SentencepieceTokenizer accepts begins offsets as the second and it should be of type i32 tensor");
-    FRONT_END_GENERAL_CHECK(get_input_element_type(2) == element::i32, "SentencepieceTokenizer accepts ends offsets as the third and it should be of type i32 tensor");
-    FRONT_END_GENERAL_CHECK(get_input_element_type(3) == element::u8, "SentencepieceTokenizer accepts sentence symbols as the fourth input and it should be of type u8 tensor");
-
-    #else
-
-    FRONT_END_GENERAL_CHECK(get_input_size() == 2, "SentencepieceTokenizer expects two inputs: sp model and input sentences");
     FRONT_END_GENERAL_CHECK(get_input_element_type(0) == element::u8, "SentencepieceTokenizer accepts sp model as the first input and it should be of type u8 tensor");
 
-    FRONT_END_GENERAL_CHECK(
-        // WA: sometimes f32 appeared as a placeholder for unknown type
-        get_input_element_type(1) == element::u8 || get_input_element_type(1) == element::string || get_input_element_type(1) == element::f32,
-        "SentencepieceTokenizer accepts sentences as the second input and it should be of type string tensor");
-
-    #endif
+    auto input_size = get_input_size();
+    if(input_size == 2) {
+        FRONT_END_GENERAL_CHECK(
+            // WA: f32 appeared as a placeholder for unknown type during intermediate conversion steps
+            get_input_element_type(1) == element::string || get_input_element_type(1) == element::f32,
+            "SentencepieceTokenizer accepts sentences as the second input and it should be of type string tensor");
+    } else if (input_size == 4) {
+        FRONT_END_GENERAL_CHECK(get_input_element_type(1) == element::i32, "SentencepieceTokenizer accepts begins offsets as the second and it should be of type i32 tensor");
+        FRONT_END_GENERAL_CHECK(get_input_element_type(2) == element::i32, "SentencepieceTokenizer accepts ends offsets as the third and it should be of type i32 tensor");
+        FRONT_END_GENERAL_CHECK(get_input_element_type(3) == element::u8, "SentencepieceTokenizer accepts sentence symbols as the fourth input and it should be of type u8 tensor");
+    } else {
+        OPENVINO_THROW("Unexpected input format. SentencepieceTokenizer accepts one string input or three decomposed string inputs (begins, ends, symbols)");
+    };
 
     // The operation SentencepieceTokenizerExtensionOp has three outputs: sparse indices, sparse values
     // and dense shape
@@ -133,17 +128,7 @@ bool SentencepieceTokenizer::evaluate(TensorVector& outputs, const TensorVector&
     std::vector<int32_t> sparse_values;
     std::vector<int64_t> sparse_dense_shape;
 
-#if SENTENCE_PIECE_EXTENSION_DECOMPOSED_STRINGS
-
-    auto begin_ids = inputs[1].data<const int32_t>();
-    auto end_ids = inputs[2].data<const int32_t>();
-    auto data = inputs[3].data<const uint8_t>();
-
-    auto batch_size = shape_size(inputs[1].get_shape());
-
-#else
-
-    auto input_element_type = get_input_element_type(1);
+    auto input_size = get_input_size();
     int32_t batch_size;
 
     // used in case of string tensors
@@ -154,27 +139,31 @@ bool SentencepieceTokenizer::evaluate(TensorVector& outputs, const TensorVector&
     const int32_t* end_ids;
     const uint8_t* data;
 
-    if(input_element_type == ov::element::string) {
-        strings = inputs[1].data<const std::string>();
-        batch_size = static_cast<int32_t>(ov::shape_size(inputs[1].get_shape()));
-    } else if(input_element_type == ov::element::u8) {
-        parse_packed_strings(inputs[1], batch_size, begin_ids, end_ids, data);
+    if (input_size == 2) {
+        auto input_element_type = get_input_element_type(1);
+        if(input_element_type == ov::element::string) {
+            strings = inputs[1].data<const std::string>();
+            batch_size = static_cast<int32_t>(ov::shape_size(inputs[1].get_shape()));
+        } else {
+            OPENVINO_THROW("Unexpected input type during inference. SentencepieceTokenizer accepts element::u8 or element::string.");
+        }
     } else {
-        OPENVINO_THROW("Unexpected input type during inference. SentencepieceTokenizer accepts element::u8 or element::string.");
-    }
-
-#endif
+        auto begin_ids = inputs[1].data<const int32_t>();
+        auto end_ids = inputs[2].data<const int32_t>();
+        auto data = inputs[3].data<const uint8_t>();
+        batch_size = shape_size(inputs[1].get_shape());
+    };
 
     size_t max_token_id = 0;
     for (size_t batch_ind = 0; batch_ind < batch_size; ++batch_ind) {
         absl::string_view sentence;
-        if(input_element_type == ov::element::string) {
+        if (input_size == 2) {
             sentence = strings[batch_ind];
-        } else if(input_element_type == ov::element::u8) {
+        } else {
             auto begin_ind = begin_ids[batch_ind];
             auto end_ind = end_ids[batch_ind];
             sentence = absl::string_view((const char*)data + begin_ind, end_ind - begin_ind);
-        }
+        };
 
         std::vector<int32_t> ids;
         CHECK_OK(m_sp->SampleEncode(sentence, m_nbest_size, m_alpha, &ids));
diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/tensorflow_translators.cpp b/modules/custom_operations/user_ie_extensions/tokenizer/tensorflow_translators.cpp
index 51179dcac..497f53509 100644
--- a/modules/custom_operations/user_ie_extensions/tokenizer/tensorflow_translators.cpp
+++ b/modules/custom_operations/user_ie_extensions/tokenizer/tensorflow_translators.cpp
@@ -60,7 +60,7 @@ NamedOutputVector translate_sentencepiece_tokenizer(const NodeContext& node) {
     auto sp_model_const = as_type_ptr<Constant>(sp_tokenize_op->input_value(0).get_node_shared_ptr());
     FRONT_END_GENERAL_CHECK(sp_model_const, "Conversion expects SentencePiece model to be constant.");
 
-    // prepare input six inputs
+    // prepare input
     auto inputs = sp_tokenize_op->input_value(1);
 
     // extract values for nbest_size, alpha, add_bos, add_eos, reverse attributes
@@ -70,27 +70,8 @@ NamedOutputVector translate_sentencepiece_tokenizer(const NodeContext& node) {
     auto add_eos = extract_scalar_const_value<bool>(sp_tokenize_op->input_value(5).get_node_shared_ptr(), "add_eos");
     auto reverse = extract_scalar_const_value<bool>(sp_tokenize_op->input_value(6).get_node_shared_ptr(), "reverse");
 
-#if !USE_STRING_TENSORS
-    // Override type of input tensor if this is a Parameter
-    if (auto parameter = std::dynamic_pointer_cast<Parameter>(inputs.get_node_shared_ptr())) {
-        parameter->set_partial_shape(PartialShape{ Dimension() });
-        parameter->set_element_type(element::u8);
-        parameter->validate_and_infer_types();
-    }
-#endif
-
-#if SENTENCE_PIECE_EXTENSION_DECOMPOSED_STRINGS
-
-    OutputVector inputs_vector = OutputVector{ sp_model_const };
-    auto unpacked_outputs = std::make_shared<StringTensorUnpack>(OutputVector{inputs}, "begins_ends")->outputs();
-    inputs_vector.insert(inputs_vector.end(), unpacked_outputs.begin(), unpacked_outputs.end());
-
-#else
-
     OutputVector inputs_vector = OutputVector{ sp_model_const, inputs };
 
-#endif
-
     // create a node with custom operation
     auto sp_tokenizer_ext = std::make_shared<SentencepieceTokenizer>(inputs_vector, nbest_size, alpha, add_bos, add_eos, reverse);
     FRONT_END_GENERAL_CHECK(sp_tokenizer_ext->get_output_size() == 3,
@@ -182,7 +163,6 @@ ov::OutputVector translate_lookup_table_find_v2(const ov::frontend::NodeContext&
 
     auto wp_tokenizer_inputs = wp_tokenizer->input_values();
     wp_tokenizer_inputs.push_back(unk_token_id);
-    //std::cerr << "Added extra input, total number of inputs is " << wp_tokenizer_inputs.size() << "\n";
 
     auto new_wp_tokenizer = wp_tokenizer->clone_with_new_inputs(wp_tokenizer_inputs);
     return { post_translate_ragged_tensor_output(new_wp_tokenizer->outputs()) };
@@ -209,7 +189,6 @@ ov::OutputVector translate_reshape(const ov::frontend::NodeContext& node) {
         auto reshape = std::make_shared<Reshape>(tensor, shape, false);
         return {reshape};
     }
-    // set_node_name(node.get_name(), reshape); // TODO: requires dependencies from TF FE internals
 }
 
 // Copied and pasted from TF FE and adopted to not use internal TF FE operation classes
@@ -232,9 +211,7 @@ ov::OutputVector translate_const(const ov::frontend::NodeContext& node) {
             const_node = std::make_shared<ov::op::util::FrameworkNode>(OutputVector{});
         }
     } else {
-        //static std::vector<ov::Tensor> tensors;
         auto tensor = node.get_attribute<ov::Tensor>("value");
-        //tensors.push_back(tensor);
         const_node = std::make_shared<Constant>(tensor);
         #if OPENVINO_ELEMENT_STRING_SUPPORTED
         if (const_node->get_element_type() == element::string) {
@@ -246,6 +223,5 @@ ov::OutputVector translate_const(const ov::frontend::NodeContext& node) {
         }
         #endif
     }
-    //set_node_name(node.get_name(), const_node);   // TODO: Provide alternative to internal function set_node_name
     return {const_node};
 }

From def3bddee39c6b023e204cb09feba44314823e7a Mon Sep 17 00:00:00 2001
From: Artur Paniukov <artur.paniukov@intel.com>
Date: Thu, 8 Feb 2024 17:48:42 +0000
Subject: [PATCH 5/7] Add Type Argument To Greedy Decoding (#858)

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 .../tokenizer/python/openvino_tokenizers/utils.py            | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/openvino_tokenizers/utils.py b/modules/custom_operations/user_ie_extensions/tokenizer/python/openvino_tokenizers/utils.py
index 1d152c13c..f4ef3fcee 100644
--- a/modules/custom_operations/user_ie_extensions/tokenizer/python/openvino_tokenizers/utils.py
+++ b/modules/custom_operations/user_ie_extensions/tokenizer/python/openvino_tokenizers/utils.py
@@ -86,9 +86,12 @@ def greedy_decoder(input) -> Model:
     return token_ids.output(0)
 
 
-def add_greedy_decoding(text_generation_model: Model, logits_output: str = LOGITS_OUTPUT_NAME) -> Model:
+def add_greedy_decoding(
+        text_generation_model: Model, logits_output: str = LOGITS_OUTPUT_NAME, output_type: Type = Type.i64
+) -> Model:
     ppp = PrePostProcessor(text_generation_model)
     ppp.output(logits_output).postprocess().custom(greedy_decoder)
+    ppp.output(logits_output).tensor().set_element_type(output_type)
     model = ppp.build()
     model.output(logits_output).tensor.set_names({TOKEN_IDS_OUTPUT_NAME})
     return model

From 02e75ff662d41557e5dce0fb3cdfaa626fdd69c8 Mon Sep 17 00:00:00 2001
From: Oleg Pipikin <oleg.pipikin@intel.com>
Date: Thu, 8 Feb 2024 20:21:17 +0100
Subject: [PATCH 6/7] Remove legacy test utils (#871)

---
 .../nvidia_plugin/tests/unit/CMakeLists.txt   |  4 +
 .../tests/unit/cuda_multi_graph_ti_test.cpp   | 84 +++++++++----------
 2 files changed, 46 insertions(+), 42 deletions(-)

diff --git a/modules/nvidia_plugin/tests/unit/CMakeLists.txt b/modules/nvidia_plugin/tests/unit/CMakeLists.txt
index 7d06bf257..bf96c6515 100644
--- a/modules/nvidia_plugin/tests/unit/CMakeLists.txt
+++ b/modules/nvidia_plugin/tests/unit/CMakeLists.txt
@@ -37,6 +37,10 @@ ov_add_test_target(
             openvino::gmock
             openvino::ov_models
             openvino::commonTestUtils
+            openvino::funcSharedTests
+        INCLUDES
+            PRIVATE
+                "${OpenVINO_SOURCE_DIR}/src/plugins/template/include"
         ADD_CPPLINT
         ADD_CLANG_FORMAT
         LABELS
diff --git a/modules/nvidia_plugin/tests/unit/cuda_multi_graph_ti_test.cpp b/modules/nvidia_plugin/tests/unit/cuda_multi_graph_ti_test.cpp
index 5c06d28da..5df6e802d 100644
--- a/modules/nvidia_plugin/tests/unit/cuda_multi_graph_ti_test.cpp
+++ b/modules/nvidia_plugin/tests/unit/cuda_multi_graph_ti_test.cpp
@@ -4,12 +4,21 @@
 
 #include <gtest/gtest.h>
 
+#include "common_test_utils/node_builders/eltwise.hpp"
+#include "common_test_utils/node_builders/gru_cell.hpp"
+#include "common_test_utils/test_constants.hpp"
 #include "cuda_graph_topology_runner.hpp"
 #include "cuda_simple_execution_delegator.hpp"
-#include "ops/parameter.hpp"
-#include "ops/result.hpp"
-#include "ov_models/builders.hpp"
+#include "functional_test_utils/ov_plugin_cache.hpp"
+#include "openvino/op/concat.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/parameter.hpp"
+#include "openvino/op/result.hpp"
+#include "openvino/op/split.hpp"
+#include "openvino/op/squeeze.hpp"
+#include "openvino/op/unsqueeze.hpp"
 #include "ov_models/utils/data_utils.hpp"
+#include "template/properties.hpp"
 
 using namespace ov::nvidia_gpu;
 using namespace testing;
@@ -41,49 +50,38 @@ void generateInput(ov::Tensor& tensor, int to = TO, int from = FROM, int seed =
     std::generate(ptr, ptr + tensor.get_size(), [&dist, &engine]() { return CalcType{dist(engine)}; });
 }
 
-std::vector<std::vector<CalcType>> calcRefs(std::shared_ptr<ov::Model> model,
-                                            const std::vector<std::shared_ptr<ov::Tensor>>& inputs) {
+ov::TensorVector calcRefs(std::shared_ptr<ov::Model> model, const std::vector<std::shared_ptr<ov::Tensor>>& inputs) {
     auto refModel = model->clone();
 
-    auto referenceInputs = std::vector<std::vector<uint8_t>>(inputs.size());
-    auto refInputsTypes = std::vector<ov::element::Type>(inputs.size());
-    for (std::size_t i = 0; i < inputs.size(); ++i) {
-        const auto& input = inputs[i];
-        const auto inputSize = input->get_byte_size();
+    std::shared_ptr<ov::Core> core = ov::test::utils::PluginCache::get().core();
 
-        auto& referenceInput = referenceInputs[i];
-        referenceInput.resize(inputSize);
-
-        const auto* buffer = static_cast<const uint8_t*>(input->data());
-        std::copy(buffer, buffer + inputSize, referenceInput.data());
-
-        refInputsTypes[i] = CALC_ELEMENT_TYPE;
+    auto compiled_model_ref = core->compile_model(
+        refModel, ov::test::utils::DEVICE_TEMPLATE, {{ov::template_plugin::disable_transformations(true)}});
+    auto infer_request_ref = compiled_model_ref.create_infer_request();
+    auto params = refModel->get_parameters();
+    OPENVINO_ASSERT(params.size() == inputs.size());
+    for (int i = 0; i < params.size(); i++) {
+        infer_request_ref.set_tensor(params[i]->get_default_output(), *inputs[i]);
     }
+    infer_request_ref.infer();
 
-    const auto expectedOutputs = ngraph::helpers::interpreterFunction(refModel, referenceInputs, refInputsTypes);
-
-    std::vector<std::vector<CalcType>> res(expectedOutputs.size());
-    for (std::size_t i = 0; i < expectedOutputs.size(); ++i) {
-        EXPECT_EQ(expectedOutputs[i].first, CALC_ELEMENT_TYPE);
-        const auto& expOut = expectedOutputs[i].second;
-        auto& resOut = res[i];
-        const auto resOutSize = expOut.size() / sizeof(CalcType);
-        resOut.resize(resOutSize);
-
-        const auto* buffer = static_cast<const CalcType*>(static_cast<const void*>(expOut.data()));
-        std::copy(buffer, buffer + resOutSize, resOut.data());
+    ov::TensorVector outputs;
+    for (const auto& output : refModel->outputs()) {
+        outputs.push_back(infer_request_ref.get_tensor(output));
     }
-    return res;
+
+    return outputs;
 }
 
-void validateOutput(const ov::Tensor& tensor, const std::vector<CalcType>& refVector, float threshold) {
+void validateOutput(const ov::Tensor& tensor, const ov::Tensor& ref_tensor, float threshold) {
     EXPECT_EQ(tensor.get_element_type(), CALC_ELEMENT_TYPE);
+    EXPECT_EQ(ref_tensor.get_element_type(), CALC_ELEMENT_TYPE);
     const auto size = tensor.get_size();
-    EXPECT_EQ(size, refVector.size());
+    EXPECT_EQ(size, ref_tensor.get_size());
     const auto* ptr = getConstPtr(tensor);
-    bool areEqual = std::equal(ptr, ptr + size, refVector.cbegin(), [threshold](auto val1, auto val2) {
-        return std::abs(val1 - val2) < threshold;
-    });
+    const auto* ref_ptr = getConstPtr(ref_tensor);
+    bool areEqual = std::equal(
+        ptr, ptr + size, ptr, [threshold](auto val1, auto val2) { return std::abs(val1 - val2) < threshold; });
     EXPECT_TRUE(areEqual);
 }
 
@@ -121,7 +119,7 @@ class GRUTI {
         auto squeeze = std::make_shared<ov::op::v0::Squeeze>(bodyParams[0], axis);
         ov::OutputVector out_vector = {squeeze, bodyParams[1]};
         auto gru_cell =
-            ngraph::builder::makeGRU(out_vector, WRB, hidden_size, {"sigmoid", "tanh"}, {}, {}, clip, false);
+            ov::test::utils::make_gru(out_vector, WRB, hidden_size, {"sigmoid", "tanh"}, {}, {}, clip, false);
         auto unsqueeze = std::make_shared<ov::op::v0::Unsqueeze>(gru_cell->output(0), axis);
         ov::ResultVector results{std::make_shared<ov::op::v0::Result>(gru_cell->output(0)),
                                  std::make_shared<ov::op::v0::Result>(unsqueeze)};
@@ -202,10 +200,12 @@ class SplitConcatAddTI {
         }
 
         auto squeeze = std::make_shared<ov::op::v0::Squeeze>(bodyParams[0], axisConstant);
-        const auto split = ngraph::builder::makeSplit(squeeze, CALC_ELEMENT_TYPE, 2, 1);
+        const auto split_axis_op =
+            std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{}, std::vector<int64_t>{1});
+        const auto split = std::make_shared<ov::op::v1::Split>(squeeze, split_axis_op, 2);
         const auto concat =
             std::make_shared<ov::op::v0::Concat>(ov::OutputVector{split->output(0), split->output(1)}, 1);
-        const auto add0 = ngraph::builder::makeEltwise(concat->output(0), bodyParams[1], EltwiseTypes::ADD);
+        const auto add0 = ov::test::utils::make_eltwise(concat->output(0), bodyParams[1], EltwiseTypes::ADD);
 
         auto unsqueeze = std::make_shared<ov::op::v0::Unsqueeze>(add0->output(0), axisConstant);
         ov::ResultVector results{std::make_shared<ov::op::v0::Result>(add0->output(0)),
@@ -299,13 +299,13 @@ class CudaMultiGraphTest : public Test {
 
     void run() { runner_.Run(*inferRequestContext_, deviceMemBlock_); }
 
-    void calcRefs() { refOutputs_ = ::calcRefs(model_, inputTensors_); }
+    void calcRefs() { refOutputTensors_ = ::calcRefs(model_, inputTensors_); }
 
     void validate(float threshold = THRESHOLD) {
         const auto size = outputTensors_.size();
-        EXPECT_EQ(size, refOutputs_.size());
+        EXPECT_EQ(size, refOutputTensors_.size());
         for (std::size_t i = 0; i < size; ++i) {
-            validateOutput(*outputTensors_[i], refOutputs_[i], THRESHOLD);
+            validateOutput(*outputTensors_[i], refOutputTensors_[i], THRESHOLD);
         }
     }
 
@@ -349,6 +349,7 @@ class CudaMultiGraphTest : public Test {
     SimpleExecutionDelegator simpleExecutionDelegator_{};
     std::vector<std::shared_ptr<ov::Tensor>> inputTensors_{populateTensors(model_->inputs())};
     std::vector<std::shared_ptr<ov::Tensor>> outputTensors_{populateTensors(model_->outputs())};
+    ov::TensorVector refOutputTensors_;
     std::map<std::string, std::size_t> inputIndices_{populateInputIndices(model_)};
     std::map<std::string, std::size_t> outputIndices_{populateOutputIndices(model_)};
     std::unique_ptr<InferenceRequestContext> inferRequestContext_ =
@@ -363,7 +364,6 @@ class CudaMultiGraphTest : public Test {
                                                   false);
     DeviceMemBlock deviceMemBlock_{runner_.GetSubGraph().memoryManager()->mutableTensorsMemoryModel()};
 
-    std::vector<std::vector<CalcType>> refOutputs_;
     int currentSeed_ = SEED;
 };
 

From 468701aa12c9960198e8184f6baf66cd695db777 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Fri, 9 Feb 2024 11:49:34 +0400
Subject: [PATCH 7/7] Revert "Remove legacy test utils (#871)" (#872)

This reverts commit 02e75ff662d41557e5dce0fb3cdfaa626fdd69c8.
---
 .../nvidia_plugin/tests/unit/CMakeLists.txt   |  4 -
 .../tests/unit/cuda_multi_graph_ti_test.cpp   | 84 +++++++++----------
 2 files changed, 42 insertions(+), 46 deletions(-)

diff --git a/modules/nvidia_plugin/tests/unit/CMakeLists.txt b/modules/nvidia_plugin/tests/unit/CMakeLists.txt
index bf96c6515..7d06bf257 100644
--- a/modules/nvidia_plugin/tests/unit/CMakeLists.txt
+++ b/modules/nvidia_plugin/tests/unit/CMakeLists.txt
@@ -37,10 +37,6 @@ ov_add_test_target(
             openvino::gmock
             openvino::ov_models
             openvino::commonTestUtils
-            openvino::funcSharedTests
-        INCLUDES
-            PRIVATE
-                "${OpenVINO_SOURCE_DIR}/src/plugins/template/include"
         ADD_CPPLINT
         ADD_CLANG_FORMAT
         LABELS
diff --git a/modules/nvidia_plugin/tests/unit/cuda_multi_graph_ti_test.cpp b/modules/nvidia_plugin/tests/unit/cuda_multi_graph_ti_test.cpp
index 5df6e802d..5c06d28da 100644
--- a/modules/nvidia_plugin/tests/unit/cuda_multi_graph_ti_test.cpp
+++ b/modules/nvidia_plugin/tests/unit/cuda_multi_graph_ti_test.cpp
@@ -4,21 +4,12 @@
 
 #include <gtest/gtest.h>
 
-#include "common_test_utils/node_builders/eltwise.hpp"
-#include "common_test_utils/node_builders/gru_cell.hpp"
-#include "common_test_utils/test_constants.hpp"
 #include "cuda_graph_topology_runner.hpp"
 #include "cuda_simple_execution_delegator.hpp"
-#include "functional_test_utils/ov_plugin_cache.hpp"
-#include "openvino/op/concat.hpp"
-#include "openvino/op/constant.hpp"
-#include "openvino/op/parameter.hpp"
-#include "openvino/op/result.hpp"
-#include "openvino/op/split.hpp"
-#include "openvino/op/squeeze.hpp"
-#include "openvino/op/unsqueeze.hpp"
+#include "ops/parameter.hpp"
+#include "ops/result.hpp"
+#include "ov_models/builders.hpp"
 #include "ov_models/utils/data_utils.hpp"
-#include "template/properties.hpp"
 
 using namespace ov::nvidia_gpu;
 using namespace testing;
@@ -50,38 +41,49 @@ void generateInput(ov::Tensor& tensor, int to = TO, int from = FROM, int seed =
     std::generate(ptr, ptr + tensor.get_size(), [&dist, &engine]() { return CalcType{dist(engine)}; });
 }
 
-ov::TensorVector calcRefs(std::shared_ptr<ov::Model> model, const std::vector<std::shared_ptr<ov::Tensor>>& inputs) {
+std::vector<std::vector<CalcType>> calcRefs(std::shared_ptr<ov::Model> model,
+                                            const std::vector<std::shared_ptr<ov::Tensor>>& inputs) {
     auto refModel = model->clone();
 
-    std::shared_ptr<ov::Core> core = ov::test::utils::PluginCache::get().core();
+    auto referenceInputs = std::vector<std::vector<uint8_t>>(inputs.size());
+    auto refInputsTypes = std::vector<ov::element::Type>(inputs.size());
+    for (std::size_t i = 0; i < inputs.size(); ++i) {
+        const auto& input = inputs[i];
+        const auto inputSize = input->get_byte_size();
 
-    auto compiled_model_ref = core->compile_model(
-        refModel, ov::test::utils::DEVICE_TEMPLATE, {{ov::template_plugin::disable_transformations(true)}});
-    auto infer_request_ref = compiled_model_ref.create_infer_request();
-    auto params = refModel->get_parameters();
-    OPENVINO_ASSERT(params.size() == inputs.size());
-    for (int i = 0; i < params.size(); i++) {
-        infer_request_ref.set_tensor(params[i]->get_default_output(), *inputs[i]);
-    }
-    infer_request_ref.infer();
+        auto& referenceInput = referenceInputs[i];
+        referenceInput.resize(inputSize);
+
+        const auto* buffer = static_cast<const uint8_t*>(input->data());
+        std::copy(buffer, buffer + inputSize, referenceInput.data());
 
-    ov::TensorVector outputs;
-    for (const auto& output : refModel->outputs()) {
-        outputs.push_back(infer_request_ref.get_tensor(output));
+        refInputsTypes[i] = CALC_ELEMENT_TYPE;
     }
 
-    return outputs;
+    const auto expectedOutputs = ngraph::helpers::interpreterFunction(refModel, referenceInputs, refInputsTypes);
+
+    std::vector<std::vector<CalcType>> res(expectedOutputs.size());
+    for (std::size_t i = 0; i < expectedOutputs.size(); ++i) {
+        EXPECT_EQ(expectedOutputs[i].first, CALC_ELEMENT_TYPE);
+        const auto& expOut = expectedOutputs[i].second;
+        auto& resOut = res[i];
+        const auto resOutSize = expOut.size() / sizeof(CalcType);
+        resOut.resize(resOutSize);
+
+        const auto* buffer = static_cast<const CalcType*>(static_cast<const void*>(expOut.data()));
+        std::copy(buffer, buffer + resOutSize, resOut.data());
+    }
+    return res;
 }
 
-void validateOutput(const ov::Tensor& tensor, const ov::Tensor& ref_tensor, float threshold) {
+void validateOutput(const ov::Tensor& tensor, const std::vector<CalcType>& refVector, float threshold) {
     EXPECT_EQ(tensor.get_element_type(), CALC_ELEMENT_TYPE);
-    EXPECT_EQ(ref_tensor.get_element_type(), CALC_ELEMENT_TYPE);
     const auto size = tensor.get_size();
-    EXPECT_EQ(size, ref_tensor.get_size());
+    EXPECT_EQ(size, refVector.size());
     const auto* ptr = getConstPtr(tensor);
-    const auto* ref_ptr = getConstPtr(ref_tensor);
-    bool areEqual = std::equal(
-        ptr, ptr + size, ptr, [threshold](auto val1, auto val2) { return std::abs(val1 - val2) < threshold; });
+    bool areEqual = std::equal(ptr, ptr + size, refVector.cbegin(), [threshold](auto val1, auto val2) {
+        return std::abs(val1 - val2) < threshold;
+    });
     EXPECT_TRUE(areEqual);
 }
 
@@ -119,7 +121,7 @@ class GRUTI {
         auto squeeze = std::make_shared<ov::op::v0::Squeeze>(bodyParams[0], axis);
         ov::OutputVector out_vector = {squeeze, bodyParams[1]};
         auto gru_cell =
-            ov::test::utils::make_gru(out_vector, WRB, hidden_size, {"sigmoid", "tanh"}, {}, {}, clip, false);
+            ngraph::builder::makeGRU(out_vector, WRB, hidden_size, {"sigmoid", "tanh"}, {}, {}, clip, false);
         auto unsqueeze = std::make_shared<ov::op::v0::Unsqueeze>(gru_cell->output(0), axis);
         ov::ResultVector results{std::make_shared<ov::op::v0::Result>(gru_cell->output(0)),
                                  std::make_shared<ov::op::v0::Result>(unsqueeze)};
@@ -200,12 +202,10 @@ class SplitConcatAddTI {
         }
 
         auto squeeze = std::make_shared<ov::op::v0::Squeeze>(bodyParams[0], axisConstant);
-        const auto split_axis_op =
-            std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{}, std::vector<int64_t>{1});
-        const auto split = std::make_shared<ov::op::v1::Split>(squeeze, split_axis_op, 2);
+        const auto split = ngraph::builder::makeSplit(squeeze, CALC_ELEMENT_TYPE, 2, 1);
         const auto concat =
             std::make_shared<ov::op::v0::Concat>(ov::OutputVector{split->output(0), split->output(1)}, 1);
-        const auto add0 = ov::test::utils::make_eltwise(concat->output(0), bodyParams[1], EltwiseTypes::ADD);
+        const auto add0 = ngraph::builder::makeEltwise(concat->output(0), bodyParams[1], EltwiseTypes::ADD);
 
         auto unsqueeze = std::make_shared<ov::op::v0::Unsqueeze>(add0->output(0), axisConstant);
         ov::ResultVector results{std::make_shared<ov::op::v0::Result>(add0->output(0)),
@@ -299,13 +299,13 @@ class CudaMultiGraphTest : public Test {
 
     void run() { runner_.Run(*inferRequestContext_, deviceMemBlock_); }
 
-    void calcRefs() { refOutputTensors_ = ::calcRefs(model_, inputTensors_); }
+    void calcRefs() { refOutputs_ = ::calcRefs(model_, inputTensors_); }
 
     void validate(float threshold = THRESHOLD) {
         const auto size = outputTensors_.size();
-        EXPECT_EQ(size, refOutputTensors_.size());
+        EXPECT_EQ(size, refOutputs_.size());
         for (std::size_t i = 0; i < size; ++i) {
-            validateOutput(*outputTensors_[i], refOutputTensors_[i], THRESHOLD);
+            validateOutput(*outputTensors_[i], refOutputs_[i], THRESHOLD);
         }
     }
 
@@ -349,7 +349,6 @@ class CudaMultiGraphTest : public Test {
     SimpleExecutionDelegator simpleExecutionDelegator_{};
     std::vector<std::shared_ptr<ov::Tensor>> inputTensors_{populateTensors(model_->inputs())};
     std::vector<std::shared_ptr<ov::Tensor>> outputTensors_{populateTensors(model_->outputs())};
-    ov::TensorVector refOutputTensors_;
     std::map<std::string, std::size_t> inputIndices_{populateInputIndices(model_)};
     std::map<std::string, std::size_t> outputIndices_{populateOutputIndices(model_)};
     std::unique_ptr<InferenceRequestContext> inferRequestContext_ =
@@ -364,6 +363,7 @@ class CudaMultiGraphTest : public Test {
                                                   false);
     DeviceMemBlock deviceMemBlock_{runner_.GetSubGraph().memoryManager()->mutableTensorsMemoryModel()};
 
+    std::vector<std::vector<CalcType>> refOutputs_;
     int currentSeed_ = SEED;
 };