From a9bfd0ff0e6683cb4b2c56d8ed1795472b9e0f2e Mon Sep 17 00:00:00 2001
From: Prakash <qxprakash@gmail.com>
Date: Mon, 29 Jul 2024 20:48:32 +0530
Subject: [PATCH 01/10] [OV JS] Add optical-character-recognition sample script
 (#25653)

## Details:

- added code in script
- updated the samples list in readme

Few Doubts and Questions

- do I need to strictly check for weather tha passed model exists and is
correct for example , does it have a corrosponing weights file present
in the directory ?

- do I also need to accept the output location from the user ? where
does they want to save the output images ? currently I am saving them in
a directory named results , and also logging the save location once the
sample finishes

- I'm logging bounding boxes along with the annotated text , let me know
if I need to change anything there as far as logging details are
concerned

- should I describe the process step by step using comments like you did
in the hello_classification sample
```
  //----------------- Step 1. Initialize OpenVINO Runtime Core -----------------

  //----------------- Step 2. Read a model -------------------------------------

  //----------------- Step 3. Set up input -------------------------------------

Please let me know your feedback @Aliczi @vishniakov-nikolai

With Regards
Prakash

---------

Co-authored-by: Vishniakov Nikolai <nikolai.vishniakov@intel.com>
---
 samples/js/node/README.md                     |   1 +
 .../optical_character_recognition/README.md   |   6 +
 .../optical-character-recognition.js          | 422 ++++++++++++++++++
 3 files changed, 429 insertions(+)
 create mode 100644 samples/js/node/optical_character_recognition/README.md
 create mode 100644 samples/js/node/optical_character_recognition/optical-character-recognition.js

diff --git a/samples/js/node/README.md b/samples/js/node/README.md
index 7375219ccf2c0a..7cde08ffbef0c9 100644
--- a/samples/js/node/README.md
+++ b/samples/js/node/README.md
@@ -34,6 +34,7 @@ You can run this sample in the browser; no installation is required.
 [Codesandbox](https://codesandbox.io/) is a free online service with limited resources. For optimal performance and more control,  it is recommended to run the sample locally.
 
 - [hello-classification-sample](https://codesandbox.io/p/devbox/openvino-node-hello-classification-sample-djl893)
+- optical-character-recognition-sample
 
 ## See Also
 
diff --git a/samples/js/node/optical_character_recognition/README.md b/samples/js/node/optical_character_recognition/README.md
new file mode 100644
index 00000000000000..130566ca0bcd4c
--- /dev/null
+++ b/samples/js/node/optical_character_recognition/README.md
@@ -0,0 +1,6 @@
+# Optical Character Recognition Node.js Sample
+
+Run:
+```bash
+node hello_reshape_ssd.js *path_to_detection_model_file* *path_to_recognition_model_file* *path_to_img* AUTO
+```
\ No newline at end of file
diff --git a/samples/js/node/optical_character_recognition/optical-character-recognition.js b/samples/js/node/optical_character_recognition/optical-character-recognition.js
new file mode 100644
index 00000000000000..5e371c1975a993
--- /dev/null
+++ b/samples/js/node/optical_character_recognition/optical-character-recognition.js
@@ -0,0 +1,422 @@
+const { addon: ov } = require('openvino-node');
+const fs = require('node:fs');
+const path = require('node:path');
+const { createCanvas, ImageData } = require('canvas');
+const { cv } = require('opencv-wasm');
+const {
+  transform,
+  getImageData,
+  argMax,
+  setShape,
+} = require('../helpers.js');
+
+if (require.main === module) {
+// Parsing and validation of input arguments
+  if (process.argv.length !== 6)
+    throw new Error(
+      `Usage: ${process.argv[1]} <path_to_detection_model>` +
+      ' <path_to_recognition_model>' +
+      ' <path_to_image> <device_name>',
+    );
+
+  const detModelXMLPath = process.argv[2];
+  const recModelXMLPath = process.argv[3];
+  const imagePath = process.argv[4];
+  const deviceName = process.argv[5];
+
+  try {
+    main(detModelXMLPath, recModelXMLPath, imagePath, deviceName);
+  } catch(error) {
+    console.error('Error Occurred', error);
+  }
+}
+
+async function main(detModelXMLPath, recModelXMLPath, imagePath, deviceName) {
+  // Initialize OpenVINO core and load the detection mode
+  const core = new ov.Core();
+  const detModel = await core.readModel(detModelXMLPath);
+  const detCompiledModel = await core.compileModel(detModel, deviceName);
+  const detInputLayer = detCompiledModel.input(0);
+  const detOutputLayer = detCompiledModel.output('boxes');
+
+  const imageData = await getImageData(imagePath);
+  const inputImageMat = cv.matFromImageData(imageData);
+
+  // Resize the image to meet network input size
+  const [, , H, W] = detInputLayer.shape;
+  const resizedImage = new cv.Mat();
+  cv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_RGBA2RGB);
+  cv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB);
+  cv.resize(inputImageMat, resizedImage, new cv.Size(W, H));
+
+  // Prepare input tensor
+  const inputImage = transform(
+    resizedImage.data,
+    { width: W, height: H },
+    [0, 1, 2],
+  );
+  const tensorData = new Float32Array(inputImage);
+  const tensor = new ov.Tensor(ov.element.f32, detInputLayer.shape, tensorData);
+
+  const detInferRequest = detCompiledModel.createInferRequest();
+
+  const detResult = await detInferRequest.inferAsync([tensor]);
+  const boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]);
+
+  const recModel = await core.readModel(recModelXMLPath);
+  const recModelCompiled = await core.compileModel(recModel, deviceName);
+  const recInputLayer = recModelCompiled.input(0);
+  const recOutputLayer = recModelCompiled.output(0);
+
+  // Process each bounding box and run inference on the recognition model
+  const [, , height, width] = recInputLayer.shape;
+  // Calculate ratios
+  const { ratioX, ratioY } = calculateRatios(inputImageMat, resizedImage);
+
+  // Convert image to grayscale
+  const grayscaleImage = convertToGrayscale(inputImageMat);
+
+  const annotations = [];
+  const croppedImages = [];
+
+  for (let i = 0; i < boundingBoxesArray.length; i++) {
+    const crop = boundingBoxesArray[i];
+    const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, crop).map(
+      Math.floor,
+    );
+    const cropRect = new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin);
+    const croppedImage = grayscaleImage.roi(cropRect);
+
+    try {
+      const preprocessedCrop = resizeAndConvertCropToModelInput(croppedImage, [
+        width,
+        height,
+      ]);
+      const tensorData = new Float32Array(preprocessedCrop);
+      const tensor = new ov.Tensor(
+        ov.element.f32,
+        Int32Array.from(recInputLayer.shape),
+        tensorData,
+      );
+
+      await inferAsyncProcess(
+        tensor,
+        recModelCompiled,
+        recOutputLayer,
+        i,
+        annotations,
+      );
+
+      croppedImages.push(cropImage(inputImageMat, xMin, yMin, xMax, yMax));
+    } catch(error) {
+      console.error('Error during preprocessing:', error);
+    }
+
+    croppedImage.delete();
+  }
+
+  grayscaleImage.delete();
+
+  const boxesWithAnnotations = boundingBoxesArray.map((box, index) => ({
+    box,
+    annotation: annotations[index],
+  }));
+
+  logBoxesWithAnnotations(boxesWithAnnotations);
+
+  convertResultToImage(
+    inputImageMat,
+    resizedImage,
+    boxesWithAnnotations,
+    { threshold: 0.3, confLabels: true },
+    './assets/results/output_image.jpg',
+  );
+
+  croppedImages.forEach((croppedImage, i) => {
+    const savePath = `./assets/results/cropped_image_${i}.jpg`;
+    saveImage(croppedImage, savePath);
+  });
+}
+
+// Function to extract bounding boxes from the model output
+function extractBoundingBoxes(output) {
+  const { data: boxes } = output;
+  const foldingCoefficient = 5;
+  const numberOfBoxes = boxes.length / foldingCoefficient;
+
+  return setShape(boxes, [numberOfBoxes, foldingCoefficient]);
+}
+
+// Function to calculate the ratios for the image
+function calculateRatios(originalImage, resizedImage) {
+  const realY = originalImage.rows;
+  const realX = originalImage.cols;
+  const resizedY = resizedImage.rows;
+  const resizedX = resizedImage.cols;
+  const ratioX = realX / resizedX;
+  const ratioY = realY / resizedY;
+
+  return { ratioX, ratioY };
+}
+
+// Function to convert the image to grayscale
+function convertToGrayscale(originalImage) {
+  const grayscaleImage = new cv.Mat();
+  cv.cvtColor(originalImage, grayscaleImage, cv.COLOR_BGR2GRAY);
+
+  return grayscaleImage;
+}
+
+// Function to adjust bounding box coordinates by a given ratio
+function multiplyByRatio(ratioX, ratioY, box) {
+  const scaleShape = (shape, idx) =>
+    idx % 2 ? Math.max(shape * ratioY, 10) : shape * ratioX;
+
+  return box.map(scaleShape);
+}
+
+// Function to resize and convert a crop to the recognition model input format
+function resizeAndConvertCropToModelInput(crop, netShape) {
+  const [netWidth, netHeight] = netShape;
+
+  // Resize the crop to the network's input shape
+  const tempImg = new cv.Mat();
+  cv.resize(crop, tempImg, new cv.Size(netWidth, netHeight));
+
+  // Create the reshaped buffer
+  const reshapedBuffer = new Uint8Array(netHeight * netWidth);
+  let index = 0;
+
+  for (let i = 0; i < netHeight; i++) {
+    for (let j = 0; j < netWidth; j++) {
+      reshapedBuffer[index++] = tempImg.ucharPtr(i, j)[0];
+    }
+  }
+
+  // Clean up
+  tempImg.delete();
+
+  return reshapedBuffer;
+}
+
+// Function to extract recognition results from the model output
+function extractRecognitionResults(output) {
+  const outputData = output.getData();
+  const outputShape = output.getShape();
+  const [, height, width] = outputShape;
+
+  return setShape(outputData, [height, width]);
+}
+
+// Function to parse annotations from the recognition results
+function parseAnnotations(recognitionResults) {
+  const letters = '~0123456789abcdefghijklmnopqrstuvwxyz';
+  const annotation = [];
+
+  for (const row of recognitionResults) {
+    const letterIndex = argMax(row);
+    const parsedLetter = letters[letterIndex];
+
+    // Stop if end character is encountered
+    if (parsedLetter === letters[0]) break;
+    annotation.push(parsedLetter);
+  }
+
+  return annotation.join('');
+}
+
+// Function to crop the image based on the bounding box coordinates
+function cropImage(originalImage, xMin, yMin, xMax, yMax) {
+  xMin = Math.max(0, xMin);
+  yMin = Math.max(0, yMin);
+  xMax = Math.min(originalImage.cols, xMax);
+  yMax = Math.min(originalImage.rows, yMax);
+  if (xMin >= xMax || yMin >= yMax) {
+    throw new Error('Invalid crop coordinates');
+  }
+  const roi = originalImage.roi(
+    new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin),
+  );
+  const cropped = new cv.Mat();
+  roi.copyTo(cropped);
+  roi.delete();
+
+  return cropped;
+}
+
+// Get Text size
+function getTextSize(text, fontFace, fontScale) {
+  const canvas = createCanvas(200, 200);
+  const ctx = canvas.getContext('2d');
+  const adjustedFontScale = fontScale * 35;
+  ctx.font = `${adjustedFontScale}px ${fontFace}`;
+  const metrics = ctx.measureText(text);
+  const width = metrics.width;
+  const height =
+    metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent;
+
+  return { width, height };
+}
+
+/* The convertResultToImage function visualizes object detection
+ results on an image by drawing bounding boxes around detected
+ objects and optionally adding labels to them. */
+
+function convertResultToImage(
+  bgrImage,
+  resizedImage,
+  boxesWithAnnotations,
+  options,
+  savePath,
+) {
+  const defaultOptions = { threshold: 0.3, confLabels: true };
+  const { threshold, confLabels } = Object.assign(defaultOptions, options);
+
+  const colors = {
+    red: [255, 0, 0, 255],
+    green: [0, 255, 0, 255],
+    white: [255, 255, 255, 255],
+  };
+  const [realY, realX] = [bgrImage.rows, bgrImage.cols];
+  const [resizedY, resizedX] = [resizedImage.rows, resizedImage.cols];
+  const [ratioX, ratioY] = [realX / resizedX, realY / resizedY];
+
+  const rgbImage = new cv.Mat();
+  cv.cvtColor(bgrImage, rgbImage, cv.COLOR_BGR2RGB);
+
+  boxesWithAnnotations.forEach(({ box, annotation }) => {
+    const conf = box[box.length - 1];
+
+    if (conf < threshold) return;
+
+    const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, box);
+
+    cv.rectangle(
+      rgbImage,
+      new cv.Point(xMin, yMin),
+      new cv.Point(xMax, yMax),
+      colors.green,
+      3,
+    );
+
+    if (!confLabels) return;
+
+    const text = `${annotation}`;
+    const fontScale = 0.8;
+    const thickness = 1;
+    const { width: textW, height: textH } = getTextSize(
+      text,
+      'Arial',
+      fontScale,
+    );
+    const imageCopy = rgbImage.clone();
+
+    cv.rectangle(
+      imageCopy,
+      new cv.Point(xMin, yMin - textH - 10),
+      new cv.Point(xMin + textW, yMin - 10),
+      colors.white,
+      cv.FILLED,
+    );
+    cv.addWeighted(imageCopy, 0.4, rgbImage, 0.6, 0, rgbImage);
+    cv.putText(
+      rgbImage,
+      text,
+      new cv.Point(xMin, yMin - 10),
+      cv.FONT_HERSHEY_SIMPLEX,
+      fontScale,
+      colors.red,
+      thickness,
+      cv.LINE_AA,
+    );
+
+    imageCopy.delete();
+  });
+
+  const saveDir = path.dirname(savePath);
+  if (!fs.existsSync(saveDir)) {
+    fs.mkdirSync(saveDir, { recursive: true });
+  }
+
+  try {
+    saveImage(rgbImage, savePath);
+  } catch(e) {
+    console.log(`Error occurred while saving ----> ${e}`);
+  }
+
+  return rgbImage;
+}
+
+// Infer async helper function
+
+async function inferAsyncProcess(
+  tensor,
+  recModelCompiled,
+  recOutputLayer,
+  i,
+  annotations,
+) {
+  // Create infer request
+  const inferRequest = recModelCompiled.createInferRequest();
+
+  // Define the completion callback function
+  function completionCallback(outputTensor, i, annotations) {
+    const recognitionResults = extractRecognitionResults(outputTensor);
+    const annotation = parseAnnotations(recognitionResults);
+    annotations.push(annotation);
+  }
+
+  // Start inference in asynchronous mode
+  try {
+    const result = await inferRequest.inferAsync([tensor]);
+    completionCallback(result[recOutputLayer], i, annotations);
+  } catch(error) {
+    console.error('Error during inference:', error);
+  }
+}
+
+// Log boudning boxes with annotations
+function logBoxesWithAnnotations(boxesWithAnnotations) {
+  boxesWithAnnotations.forEach((item, i) => {
+    const { box, annotation } = item;
+    console.log(`Box ${i}: [${box}], Annotation: ${annotation}`);
+  });
+}
+
+function saveImage(rgbImage, savePath) {
+  const canvas = createCanvas(rgbImage.cols, rgbImage.rows);
+  const ctx = canvas.getContext('2d');
+  const componentsPerPixel =
+    rgbImage.data.length / (rgbImage.cols * rgbImage.rows);
+  const imgDataArr = [];
+
+  if (componentsPerPixel === 1) {
+    for (const val of rgbImage.data) {
+      imgDataArr.push(val, val, val, 255);
+    }
+  } else if (componentsPerPixel === 3) {
+    for (let i = 0; i < rgbImage.data.length; i++) {
+      if (i % 3 === 0) imgDataArr.push(255);
+      imgDataArr.push(rgbImage.data[i]);
+    }
+  }
+
+  const imageData = new ImageData(
+    new Uint8ClampedArray(imgDataArr),
+    rgbImage.cols,
+    rgbImage.rows,
+  );
+  ctx.putImageData(imageData, 0, 0);
+
+  const dataURL = canvas.toDataURL('image/jpeg');
+  const base64Data = dataURL.replace(/^data:image\/jpeg;base64,/, '');
+  const imageBuffer = Buffer.from(base64Data, 'base64');
+
+  const saveDir = path.dirname(savePath);
+  if (!fs.existsSync(saveDir)) {
+    fs.mkdirSync(saveDir, { recursive: true });
+  }
+
+  fs.writeFileSync(savePath, imageBuffer);
+  console.log('Image saved successfully!', savePath);
+}

From 131c944a258dfd625cf87667d764cb4103e73d81 Mon Sep 17 00:00:00 2001
From: Ilya Albrecht <ilya.albrecht@intel.com>
Date: Mon, 29 Jul 2024 08:33:05 -0700
Subject: [PATCH 02/10] [GPU] Use array for tracking memory usage instead of
 map (#25269)

### Details:
- Any additional locking and synchronization on memory allocation might
have negative impact on MT execution.
- `std::map` has very slow access are requires lock on every access. We
can use `std::array` instead to hold compile time known number of
buckets.
 - `array` container has lower access latency and memory overhead.
 - We might me able to remove mutex lock on stat collection.
---
 .../include/intel_gpu/runtime/engine.hpp      |  5 +-
 .../include/intel_gpu/runtime/memory_caps.hpp |  1 +
 src/plugins/intel_gpu/src/runtime/engine.cpp  | 63 ++++++++-----------
 3 files changed, 30 insertions(+), 39 deletions(-)

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
index 320e2b466de5a4..7e77ceb6785cb5 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
@@ -167,10 +167,9 @@ class engine {
     /// Create engine for given @p device and @p configuration
     engine(const device::ptr device);
     const device::ptr _device;
-    mutable std::mutex _mutex;
 
-    std::map<allocation_type, std::atomic<uint64_t>> _memory_usage_map;
-    std::map<allocation_type, std::atomic<uint64_t>> _peak_memory_usage_map;
+    std::array<std::atomic<uint64_t>, static_cast<size_t>(allocation_type::max_value)> _memory_usage_data{};
+    std::array<std::atomic<uint64_t>, static_cast<size_t>(allocation_type::max_value)> _peak_memory_usage_data{};
 };
 
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp
index 306a23fe1c3aaa..0a8da995d9af02 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp
@@ -18,6 +18,7 @@ enum class allocation_type {
     usm_host,    // Accessible by host and device. Not Migratable
     usm_shared,  // Accessible by host and device. Migrtable.
     usm_device,  // Accessible only by device. Not migratable.
+    max_value,   // Used for data array size. Shall be last
 };
 
 inline std::ostream& operator<<(std::ostream& out, const allocation_type& alloc_type) {
diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp
index ec0beef6a8aa31..73da14f6e16f47 100644
--- a/src/plugins/intel_gpu/src/runtime/engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/engine.cpp
@@ -197,65 +197,56 @@ memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint
 #endif  // _WIN32
 
 uint64_t engine::get_max_used_device_memory() const {
-    std::lock_guard<std::mutex> guard(_mutex);
     uint64_t total_peak_memory_usage {0};
-    for (auto const& m : _peak_memory_usage_map) {
-        total_peak_memory_usage += m.second.load();
+    for (auto const& m : _peak_memory_usage_data) {
+        total_peak_memory_usage += m.load();
     }
     return total_peak_memory_usage;
 }
 
 uint64_t engine::get_max_used_device_memory(allocation_type type) const {
-    std::lock_guard<std::mutex> guard(_mutex);
-    uint64_t peak_memory_usage {0};
-    auto iter = _peak_memory_usage_map.find(type);
-    if (iter != _peak_memory_usage_map.end()) {
-        peak_memory_usage = iter->second.load();
-    }
-    return peak_memory_usage;
+    return _peak_memory_usage_data[static_cast<size_t>(type)].load();
 }
 
 uint64_t engine::get_used_device_memory(allocation_type type) const {
-    std::lock_guard<std::mutex> guard(_mutex);
-    uint64_t memory_usage {0};
-    auto iter = _memory_usage_map.find(type);
-    if (iter != _memory_usage_map.end()) {
-        memory_usage = iter->second.load();
-    }
-    return memory_usage;
+    return _memory_usage_data[static_cast<size_t>(type)].load();
 }
 
 std::map<std::string, uint64_t> engine::get_memory_statistics() const {
-    std::lock_guard<std::mutex> guard(_mutex);
     std::map<std::string, uint64_t> statistics;
-    for (auto const& m : _memory_usage_map) {
-        std::ostringstream oss;
-        oss << m.first;
-        statistics[oss.str()] = m.second.load();
-    }
+    const auto add_stat = [&](allocation_type type) {
+        auto idx = static_cast<size_t>(type);
+        auto value = _memory_usage_data[idx].load();
+        if (value != 0) {
+            std::ostringstream oss;
+            oss << type;
+            statistics[oss.str()] = value;
+        }
+    };
+
+    add_stat(allocation_type::unknown);
+    add_stat(allocation_type::cl_mem);
+    add_stat(allocation_type::usm_host);
+    add_stat(allocation_type::usm_shared);
+    add_stat(allocation_type::usm_device);
     return statistics;
 }
 
 void engine::add_memory_used(uint64_t bytes, allocation_type type) {
-    std::lock_guard<std::mutex> guard(_mutex);
-    if (!_memory_usage_map.count(type) && !_peak_memory_usage_map.count(type)) {
-        _memory_usage_map[type] = 0;
-        _peak_memory_usage_map[type] = 0;
-    }
-    _memory_usage_map[type] += bytes;
-    if (_memory_usage_map[type] > _peak_memory_usage_map[type]) {
-        _peak_memory_usage_map[type] = _memory_usage_map[type].load();
+    auto idx = static_cast<size_t>(type);
+    const auto new_val = _memory_usage_data[idx].fetch_add(bytes) + bytes;
+    // Make sure actual maximum value is stored
+    while (new_val > _peak_memory_usage_data[idx]) {
+        _peak_memory_usage_data[idx] = new_val;
     }
 }
 
 void engine::subtract_memory_used(uint64_t bytes, allocation_type type) {
-    std::lock_guard<std::mutex> guard(_mutex);
-    auto iter = _memory_usage_map.find(type);
-    if (iter != _memory_usage_map.end()) {
-        _memory_usage_map[type] -= bytes;
-    } else {
+    auto idx = static_cast<size_t>(type);
+    if (_memory_usage_data[idx].load() < bytes) {
         throw std::runtime_error("Attempt to free unallocated memory");
     }
+    _memory_usage_data[idx] -= bytes;
 }
 
 std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type, runtime_types runtime_type, const device::ptr device) {

From a2077e3f22f78db22199af2fd385f27462c32c6b Mon Sep 17 00:00:00 2001
From: Kelvin Choi <kelvin.choi@intel.com>
Date: Tue, 30 Jul 2024 08:21:05 +0900
Subject: [PATCH 03/10] [GPU] Fix gemm_tiled_opt kernel to support B_VEC_SIZE >
 1 for static (#25631)

### Details:
 - *Fix gemm_tiled_opt to support B_VEC_SIZE>1 at static shape*

### Tickets:
 - *139840*
---
 .../kernel_selector/cl_kernels/gemm_tiled_opt.cl  | 14 ++++++++++++--
 .../kernels/gemm/gemm_kernel_tiled_opt.cpp        |  4 +++-
 .../tests/unit/test_cases/gemm_gpu_test.cpp       | 15 +++++++++++++--
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl
index fa30466de60c8c..5ad89020ebf62a 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl
@@ -786,6 +786,12 @@ KERNEL(gemm_tiled_opt)(
         ACCUMULATOR_TYPE_VEC dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id];
         #endif // BIAS_TERM
 
+        #if TRANSPOSE_OUTPUT == TRANSPOSE_X_LAST
+        const uint x_pitch = 1;
+        #else
+        const uint x_pitch = output_x_pitch;
+        #endif
+
         #if HAS_FUSED_OPS
             #if FUSED_OPS_CAN_USE_PRELOAD
         FUSED_OPS_CALC_VEC;
@@ -793,9 +799,13 @@ KERNEL(gemm_tiled_opt)(
         FUSED_OPS_VEC;
             #endif // FUSED_OPS_CAN_USE_PRELOAD
         OUTPUT_TYPE_VEC res = FUSED_OPS_RESULT_VEC;
-        BLOCK_WRITE_C(d_ptr, 0, res);
+        unroll_for (uint n_elem = 0; n_elem < B_VEC_SIZE; ++n_elem) {
+            BLOCK_WRITEN(OUTPUT_TYPE, 1, d_ptr, SIMD_WIDTH * n_elem * output_x_pitch, res[n_elem]);
+        }
         #else // HAS_FUSED_OPS
-        BLOCK_WRITE_C(d_ptr, 0, dequantized);
+        unroll_for (uint n_elem = 0; n_elem < B_VEC_SIZE; ++n_elem) {
+            BLOCK_WRITEN(OUTPUT_TYPE, 1, d_ptr, SIMD_WIDTH * n_elem * output_x_pitch, dequantized[n_elem]);
+        }
         #endif // HAS_FUSED_OPS
     #endif // TILE_N_NOT_DIVISIBLE || B_VEC_SIZE == 1
 #endif // IS_DYNAMIC
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp
index e59f424e5d6af7..43f32f3e8e18b0 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp
@@ -96,7 +96,9 @@ GemmKernelTiledOpt::GemmTuningData GemmKernelTiledOpt::SetTuningParams(const gem
             tuning_data.tile_m_size = tuning_data.simd_size;
         }
         // Increasing tile_n_size has performance improvement when m_size and n_size are not shallow and n_size is aligned at 32.
-        if (m_size >= 128 && n_size >= 128 && (n_size % 32 == 0) && tuning_data.simd_size == 16 && params.fused_ops.empty())
+        // TODO: Support TILE_K_LEFTOVER true case at static shape
+        if (m_size >= 128 && n_size >= 128 && (n_size % 32 == 0) && tuning_data.simd_size == 16 &&
+            (k_size % tuning_data.tile_k_size == 0) && params.fused_ops.empty())
             tuning_data.tile_n_size = 32;
 
         GPU_DEBUG_LOG << params.layerID << ": m_size: " << m_size << ", n_size: " << n_size << ", k_size: " << k_size << std::endl;
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp
index 2e4bebe8f3274d..86e054ec93292d 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp
@@ -1316,7 +1316,7 @@ class gemm_gpu_tests: public ::testing::Test {
         }
     }
 
-    void test_transpose_matmul_f16(size_t num_dims, bool is_input_dynamic, bool is_caching_test, std::vector<size_t> BMKN, std::vector<int64_t> input0_order, std::vector<int64_t> input1_order) {
+    void test_transpose_matmul_f16(size_t num_dims, bool is_input_dynamic, bool is_caching_test, std::vector<size_t> BMKN, std::vector<int64_t> input0_order, std::vector<int64_t> input1_order, const double abs_error = 0.0001) {
         tests::random_generator rg;
         rg.set_seed(GET_SUITE_NAME);
 
@@ -1411,7 +1411,6 @@ class gemm_gpu_tests: public ::testing::Test {
 
         ASSERT_EQ(output_ptr.size(), ref_out_data.size());
 
-        const auto abs_error = 0.0001;
         for (uint32_t i = 0; i < ref_out_data.size(); ++i) {
             ASSERT_NEAR(output_ptr[i], ref_out_data[i], abs_error) << "at " << i;
         }
@@ -1487,10 +1486,22 @@ TEST_F(gemm_gpu_tests, transpose_matmul_static_3d_f16) {
     this->test_transpose_matmul_f16(3, false, false, /*BMKN*/{19, 37, 23, 29}, /*input0_order*/{0, 2, 1}, /*input1_order*/{1, 2, 0});
 }
 
+TEST_F(gemm_gpu_tests, transpose_matmul_static_3d_f16_n32) {
+    this->test_transpose_matmul_f16(3, false, false, /*BMKN*/{1, 256, 32, 128}, /*input0_order*/{0, 1, 2}, /*input1_order*/{0, 2, 1}, 0.1);
+}
+
 TEST_F(gemm_gpu_tests, transpose_matmul_static_3d_f32) {
     this->test_transpose_matmul_f32(3, false, false, /*BMKN*/{19, 37, 23, 29}, /*input0_order*/{0, 2, 1}, /*input1_order*/{1, 2, 0});
 }
 
+TEST_F(gemm_gpu_tests, transpose_matmul_static_3d_f32_n32) {
+    this->test_transpose_matmul_f32(3, false, false, /*BMKN*/{2, 128, 16, 256}, /*input0_order*/{0, 1, 2}, /*input1_order*/{0, 2, 1});
+}
+
+TEST_F(gemm_gpu_tests, transpose_matmul_static_3d_f32_n32_k_remainder) {
+    this->test_transpose_matmul_f32(3, false, false, /*BMKN*/{2, 128, 17, 256}, /*input0_order*/{0, 1, 2}, /*input1_order*/{0, 2, 1});
+}
+
 TEST_F(gemm_gpu_tests, transpose_matmul_dynamic_4d_f16_unaligned) {
     this->test_transpose_matmul_f16(4, true, false, /*BMKN*/{19, 37, 23, 29}, /*input0_order*/{0, 2, 3, 1}, /*input1_order*/{1, 2, 3, 0});
 }

From 7851b8e353d08eea88744084eee42009c7be8cde Mon Sep 17 00:00:00 2001
From: Luwei Zhou <luwei.zhou@intel.com>
Date: Tue, 30 Jul 2024 13:14:22 +0800
Subject: [PATCH 04/10] [CPU] Migrate to onednn 3.5 (#25153)

### Details:
OV:
- *1. Parse impl_type based one onednn updated verbose, brg_conv->
brgconv, avx10_1_512->avx512*
 - *2.  onednn brgemm_t->brgemm_desc_t*
- *3. onednn create_brgemm_matmul_copy_b() API needs to specify stride
and transpose explicitly*
 - *4. jit generator constructor changes in onednn*
- *5. onednn upgrades xbyak version to v7.05. Needs to upgrade the
thirdparty/xbyak to same version to avoid exceptions caused by static
build symbol collision*
 - *6. debugging information about impl_type*
 - *7. Remove dnnl_memory_set_data_handle_no_pads_proc() invocation*

ONEDNN 3.5 clean up:

- *1. Remove dnnl_memory_set_data_handle_no_pads_proc() and remove the
legacy commit*
  - *2.  Separate arm commits with X86 for maintainability*
  - *3.  squash work*
- *4. revert
https://github.com/oneapi-src/oneDNN/commit/cf5a4fedb42bdf54e06269208945459c699890c2
because of perf regression*

ONEDNN branch:
backup without squashing:
origin/luwei/v3.5_for_ie_master_bak
after squashing:
https://github.com/openvinotoolkit/oneDNN/tree/v3.5_for_ie_master_squash

### Tickets:
 - *[CVS 12434](https://jira.devtools.intel.com/browse/CVS-124341)*

---------

Co-authored-by: Zhang Yi3 <yi3.zhang@intel.com>
---
 src/plugins/intel_cpu/src/cpu_memory.cpp      | 13 +++---------
 .../x64/jit_brgemm_copy_b_emitter.cpp         |  5 ++++-
 .../snippets/x64/kernel_executors/brgemm.cpp  |  2 +-
 src/plugins/intel_cpu/src/graph.cpp           | 20 +++++++++++++++++--
 src/plugins/intel_cpu/src/node.cpp            | 13 +++++++++++-
 src/plugins/intel_cpu/src/nodes/conv.cpp      | 13 +++++++++++-
 .../src/nodes/kernels/x64/brgemm_kernel.cpp   | 16 ++++++---------
 .../src/nodes/kernels/x64/jit_kernel_base.cpp |  2 +-
 src/plugins/intel_cpu/src/nodes/matmul.cpp    | 13 +++++++++++-
 src/plugins/intel_cpu/src/nodes/mha.cpp       |  6 ++++--
 src/plugins/intel_cpu/src/nodes/pooling.cpp   | 13 +++++++++++-
 src/plugins/intel_cpu/src/nodes/rnn.cpp       |  5 +++++
 .../intel_cpu/src/onednn/iml_type_mapper.cpp  |  9 +++++++--
 src/plugins/intel_cpu/thirdparty/onednn       |  2 +-
 thirdparty/xbyak                              |  2 +-
 15 files changed, 99 insertions(+), 35 deletions(-)

diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp
index 515fe92845702c..ab454382f57d73 100644
--- a/src/plugins/intel_cpu/src/cpu_memory.cpp
+++ b/src/plugins/intel_cpu/src/cpu_memory.cpp
@@ -145,7 +145,7 @@ void Memory::redefineDesc(MemoryDescPtr desc) {
 void Memory::update() {
     if (dnnlMemHandle.isInit()) {
         auto prim = dnnlMemHandle.getPrim();
-        prim.set_data_handle_no_pads_proc(m_mgrHandle->getRawPtr());
+        prim.set_data_handle(m_mgrHandle->getRawPtr());
     }
 }
 
@@ -177,12 +177,8 @@ dnnl::memory Memory::DnnlMemPrimHandle::getPrim() const {
         //
         // ========================
         auto data = m_memObjPtr->getDataNoThrow();
-        auto pads_zeroing = m_memObjPtr->m_padsZeroing;
         if (data != nullptr) {
-            if (pads_zeroing)
-                m_prim.set_data_handle(data);
-            else
-                m_prim.set_data_handle_no_pads_proc(data);
+            m_prim.set_data_handle(data);
         }
     }
     return m_prim;
@@ -498,10 +494,7 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo
         m_prim = dnnl::memory(dnnl_desc->getDnnlDesc(), m_eng, DNNL_MEMORY_NONE);
         //
         // ========================
-        if (pads_zeroing)
-            m_prim.set_data_handle(m_pMemMngr->getRawPtr());
-        else
-            m_prim.set_data_handle_no_pads_proc(m_pMemMngr->getRawPtr());
+        m_prim.set_data_handle(m_pMemMngr->getRawPtr());
     }
     catch (const std::exception& exc) {
         dnnlErrorCtx = exc.what();
diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp
index a31c4a18c68824..1cee6528ec9592 100644
--- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp
+++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp
@@ -85,9 +85,10 @@ void jit_brgemm_copy_b_emitter::init_brgemm_copy(std::unique_ptr<matmul::jit_brg
     matmul::brgemm_matmul_conf_t brgCopyKernelConf;
     brgCopyKernelConf.src_dt = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::ElementTypeToDataType(src_dt));
     brgCopyKernelConf.wei_dt = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::ElementTypeToDataType(wei_dt));
+    brgCopyKernelConf.orig_wei_dt = brgCopyKernelConf.wei_dt;
     brgCopyKernelConf.wei_n_blk = static_cast<int>(N_blk);
     brgCopyKernelConf.wei_tag = dnnl_abcd;  // What's about other ranks?
-    brgCopyKernelConf.copy_B_wei_stride = 0;
+    brgCopyKernelConf.transposed_B = false;
     brgCopyKernelConf.LDB = static_cast<dim_t>(LDB);
     brgCopyKernelConf.N =  static_cast<dim_t>(N);
     brgCopyKernelConf.N_tail = static_cast<dim_t>(N_tail);
@@ -97,6 +98,8 @@ void jit_brgemm_copy_b_emitter::init_brgemm_copy(std::unique_ptr<matmul::jit_brg
     brgCopyKernelConf.N_chunk_elems = brgCopyKernelConf.N_blk;
     brgCopyKernelConf.b_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast<dnnl::memory::data_type>(brgCopyKernelConf.src_dt));
     brgCopyKernelConf.tr_b_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast<dnnl::memory::data_type>(brgCopyKernelConf.src_dt));
+    brgCopyKernelConf.copy_B_wei_stride = brgCopyKernelConf.N * brgCopyKernelConf.b_dt_sz;
+
     brgCopyKernelConf.req_wei_vnni_downconvert = false;
 
 
diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp
index e538c3baef28bb..45c0923f4b4dbd 100644
--- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp
+++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp
@@ -134,7 +134,7 @@ std::shared_ptr<BrgemmCompiledKernel> BrgemmKernelExecutor::compile_kernel(const
     if (config.is_empty())
         return compiled_kernel;
 
-    cpu::x64::brgemm_t desc;
+    cpu::x64::brgemm_desc_t desc;
     auto status = brgemm_desc_init(&desc, config.get_isa(), cpu::x64::brgemm_strd,
                                    config.get_dt_in0(), config.get_dt_in1(),
                                    false, false, cpu::x64::brgemm_row_major, 1.f,
diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp
index 490c15fceb2ec4..4f3bf381d20720 100644
--- a/src/plugins/intel_cpu/src/graph.cpp
+++ b/src/plugins/intel_cpu/src/graph.cpp
@@ -392,7 +392,23 @@ void Graph::InitDescriptors() {
         OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.initSupportedPrimitiveDescriptors);
         DEBUG_LOG("Init supported primitive descriptors for node: ", node->getName());
         node->initSupportedPrimitiveDescriptors();
-
+#ifdef CPU_DEBUG_CAPS
+        {
+            const auto& SPDs = node->getSupportedPrimitiveDescriptors();
+            for (size_t i = 0; i < SPDs.size(); i++) {
+                DEBUG_LOG("#",
+                        node->getExecIndex(),
+                        " ",
+                        node->getName(),
+                        " Before filter, SupportedPrimitiveDescriptors [",
+                        i,
+                        "/",
+                        SPDs.size(),
+                        "]: \n",
+                        SPDs[i]);
+            }
+        }
+#endif
         OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.filterSupportedPrimitiveDescriptors);
         DEBUG_LOG("Filter supported primitive descriptors for node: ", node->getName());
         node->filterSupportedPrimitiveDescriptors();
@@ -404,7 +420,7 @@ void Graph::InitDescriptors() {
                       node->getExecIndex(),
                       " ",
                       node->getName(),
-                      "  SupportedPrimitiveDescriptors [",
+                      " After filter,  SupportedPrimitiveDescriptors [",
                       i,
                       "/",
                       SPDs.size(),
diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
index 41c3011f8707ec..326b3e907dcc8f 100644
--- a/src/plugins/intel_cpu/src/node.cpp
+++ b/src/plugins/intel_cpu/src/node.cpp
@@ -671,10 +671,21 @@ void Node::initSupportedPrimitiveDescriptors() {
     * since custom implementations can be not available at all, so a fallback to the default ones must happen
     * To achive the fallback, it is necessary to create a supported primitive descriptor for each implementation
     * since oneDNN primitive is mutating while iterating */
-
+#ifdef CPU_DEBUG_CAPS
+    {
+       if (!customImplPriorities.empty()) {
+            DEBUG_LOG("#", getName(), " customImplPriorities [", 0 , "/", customImplPriorities.size(),
+                        "]: ", impl_type_to_string(customImplPriorities[0]));
+       }
+    }
+#endif
     for (auto& desc : descs) {
         auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get()));
         const bool first_match = customImplPriorities.empty();
+        DEBUG_LOG("#", getName(),
+                       ", itpd.impl_info_str(): ", desc.impl_info_str(),
+                    ", parsed imp_type: ", impl_type_to_string(parse_impl_name(desc.impl_info_str())),
+                    ", first_match: ", first_match ? "true" : "false");
         DnnlExtensionUtils::for_each_implementation(desc,
                                                     first_match,
                                                     [&](impl_desc_type implType) {
diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index e22a36af852a14..2422e2d3bb041c 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -805,7 +805,14 @@ void Convolution::initSupportedPrimitiveDescriptors() {
 
         supportedPrimitiveDescriptors.emplace_back(config, impl_type);
     };
-
+#ifdef CPU_DEBUG_CAPS
+    {
+       if (!customImplPriorities.empty()) {
+            DEBUG_LOG("#", getName(), " customImplPriorities [", 0 , "/", customImplPriorities.size(),
+                        "]: ", impl_type_to_string(customImplPriorities[0]));
+       }
+    }
+#endif
     for (size_t dIdx = 0; dIdx < descs.size(); dIdx++) {
         auto& desc = descs[dIdx];
         auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get()));
@@ -816,6 +823,10 @@ void Convolution::initSupportedPrimitiveDescriptors() {
         };
 
         const bool first_match = customImplPriorities.empty();
+        DEBUG_LOG("#", getName(),
+                       ", itpd.impl_info_str(): ", desc.impl_info_str(),
+                    ", parsed imp_type: ", impl_type_to_string(parse_impl_name(desc.impl_info_str())),
+                    ", first_match: ", first_match ? "true" : "false");
         DnnlExtensionUtils::for_each_implementation(desc,
                                                     first_match,
                                                     [&](impl_desc_type implType) {
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp
index 86f80b33a8c875..e729fac66dd257 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp
@@ -104,12 +104,7 @@ BrgemmKernel::BrgemmKernel(size_t M,
 
     if (brgemmCtx0.is_with_amx || inType == ov::element::bf16 || b_transposed) {
         size_t b_stride = 0;
-        // must set actual stride when stride is not K/N
-        if (b_transposed) {
-            b_stride = ldb == K ? 0 : ldb * inType.size();
-        } else {
-            b_stride = ldb == N ? 0 : ldb * inType.size();
-        }
+        b_stride = ldb * inType.size();
         // K should use the original K
         init_brgemm_copy_b(brgCopyBKernel,
                            N,
@@ -136,7 +131,7 @@ const size_t BrgemmKernel::get_scratch_b_size() const {
 void BrgemmKernel::init_brgemm(brgemmCtx& ctx,
                                  std::unique_ptr<dnnl::impl::cpu::x64::brgemm_kernel_t>& brgKernel,
                                  bool use_amx) {
-    brgemm_t brgDesc;
+    brgemm_desc_t brgDesc;
 
     const bool is_int8 =
         one_of(ctx.dt_in0, data_type::u8, data_type::s8) && one_of(ctx.dt_in1, data_type::u8, data_type::s8);
@@ -245,10 +240,12 @@ void BrgemmKernel::init_brgemm_copy_b(
     brgemm_matmul_conf_t brgCopyKernelConf;
     brgCopyKernelConf.src_dt = dt_in0;
     brgCopyKernelConf.wei_dt = dt_in1;
+    brgCopyKernelConf.orig_wei_dt = dt_in1;
     brgCopyKernelConf.wei_n_blk = N_blk;
-    // B could come from strided tensor, must use copy_B_wei_stride if set.
-    brgCopyKernelConf.wei_tag = copy_B_wei_stride != 0 ? transpose ? dnnl_adbc : dnnl_acbd : transpose ? dnnl_ba : dnnl_ab;
+    brgCopyKernelConf.wei_tag =  transpose ? dnnl_ba : dnnl_ab;
     brgCopyKernelConf.copy_B_wei_stride = copy_B_wei_stride;
+    brgCopyKernelConf.transposed_B = transpose;
+
     // LDB here is for the target tensor, not source tensor
     brgCopyKernelConf.LDB = LDB;
     brgCopyKernelConf.N = N;
@@ -274,7 +271,6 @@ void BrgemmKernel::init_brgemm_copy_b(
     brgCopyKernelConf.has_zero_point_a = false;
     brgCopyKernelConf.has_zero_point_b = false;
     brgCopyKernelConf.src_zp_type = dnnl::impl::cpu::x64::none;
-
     auto ret = create_brgemm_matmul_copy_b(brgCopyKernel, &brgCopyKernelConf);
     if (ret != dnnl::impl::status_t::dnnl_success)
         THROW_ERROR("cannot create_brgemm_matmul_copy_b kernel");
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp
index f62d6e6c351a52..8fd3a966e13887 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp
@@ -11,7 +11,7 @@ namespace intel_cpu {
 namespace kernel {
 
 JitKernelBase::JitKernelBase(const char* name, x64::cpu_isa_t isa)
-        : x64::jit_generator(name, nullptr, x64::MAX_CODE_SIZE, true, isa), m_isa(isa) {
+        : x64::jit_generator(name, isa), m_isa(isa) {
     vlen = x64::isa_max_vlen(isa);
 }
 
diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp
index 4355af1ea12993..2841e6f100afb7 100644
--- a/src/plugins/intel_cpu/src/nodes/matmul.cpp
+++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp
@@ -484,10 +484,21 @@ void MatMul::initSupportedPrimitiveDescriptors() {
 
         supportedPrimitiveDescriptors.emplace_back(config, impl_type);
     };
-
+#ifdef CPU_DEBUG_CAPS
+    {
+       if (!customImplPriorities.empty()) {
+            DEBUG_LOG("#", getName(), " customImplPriorities [", 0 , "/", customImplPriorities.size(),
+                        "]: ", impl_type_to_string(customImplPriorities[0]));
+       }
+    }
+#endif
     for (auto& desc : descs) {
         auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get()));
         const bool first_match = customImplPriorities.empty();
+        DEBUG_LOG("#", getName(),
+                ", itpd.impl_info_str(): ", desc.impl_info_str(),
+            ", parsed imp_type: ", impl_type_to_string(parse_impl_name(desc.impl_info_str())),
+            ", first_match: ", first_match ? "true" : "false");
         DnnlExtensionUtils::for_each_implementation(desc,
                                                     first_match,
                                                     [&](impl_desc_type implType) {
diff --git a/src/plugins/intel_cpu/src/nodes/mha.cpp b/src/plugins/intel_cpu/src/nodes/mha.cpp
index 3e87225fdbdba5..0a7fb66fc8897b 100644
--- a/src/plugins/intel_cpu/src/nodes/mha.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mha.cpp
@@ -801,7 +801,7 @@ void MHA::initSupportedPrimitiveDescriptors() {
 
 void MHA::init_brgemm(brgemmCtx& ctx, std::unique_ptr<brgemm_kernel_t>& brgKernel, bool use_amx) {
 #ifdef OPENVINO_ARCH_X86_64
-    brgemm_t brgDesc;
+    brgemm_desc_t brgDesc;
     brgemm_strides_t strides {static_cast<dnnl_dim_t>(ctx.M * ctx.K), static_cast<dnnl_dim_t>(ctx.K * ctx.N)};
 
     const bool is_int8 = one_of(ctx.dt_in0, data_type::u8, data_type::s8) && one_of(ctx.dt_in1, data_type::u8, data_type::s8);
@@ -859,9 +859,10 @@ void MHA::init_brgemm_copy_b(std::unique_ptr<jit_brgemm_matmul_copy_b_t>& brgCop
     brgemm_matmul_conf_t brgCopyKernelConf;
     brgCopyKernelConf.src_dt = dt_in0;
     brgCopyKernelConf.wei_dt = dt_in1;
+    brgCopyKernelConf.orig_wei_dt = dt_in1;
     brgCopyKernelConf.wei_n_blk = N_blk;
     brgCopyKernelConf.wei_tag = dnnl_abcd;
-    brgCopyKernelConf.copy_B_wei_stride = 0;
+    brgCopyKernelConf.transposed_B = false;
     brgCopyKernelConf.LDB = LDB;
     brgCopyKernelConf.N = N;
     brgCopyKernelConf.N_tail = N_tail;
@@ -872,6 +873,7 @@ void MHA::init_brgemm_copy_b(std::unique_ptr<jit_brgemm_matmul_copy_b_t>& brgCop
     brgCopyKernelConf.b_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast<dnnl::memory::data_type>(brgCopyKernelConf.src_dt));
     brgCopyKernelConf.tr_b_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast<dnnl::memory::data_type>(brgCopyKernelConf.src_dt));
     brgCopyKernelConf.req_wei_vnni_downconvert = false;
+    brgCopyKernelConf.copy_B_wei_stride = brgCopyKernelConf.N * brgCopyKernelConf.b_dt_sz;
 
     if (is_with_amx) {
         brgCopyKernelConf.isa = avx512_core_amx;
diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp
index d412cdaecda192..299ba4d15f4b6a 100644
--- a/src/plugins/intel_cpu/src/nodes/pooling.cpp
+++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp
@@ -647,10 +647,21 @@ void Pooling::initSupportedPrimitiveDescriptors() {
 
         supportedPrimitiveDescriptors.emplace_back(config, impl_type);
     };
-
+#ifdef CPU_DEBUG_CAPS
+    {
+       if (!customImplPriorities.empty()) {
+            DEBUG_LOG("#", getName(), " customImplPriorities [", 0 , "/", customImplPriorities.size(),
+                        "]: ", impl_type_to_string(customImplPriorities[0]));
+       }
+    }
+#endif
     for (auto& desc : descs) {
         auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get()));
         const bool first_match = customImplPriorities.empty();
+        DEBUG_LOG("#", getName(),
+            ", itpd.impl_info_str(): ", desc.impl_info_str(),
+            ", parsed imp_type: ", impl_type_to_string(parse_impl_name(desc.impl_info_str())),
+            ", first_match: ", first_match ? "true" : "false");
         DnnlExtensionUtils::for_each_implementation(desc,
                                                     first_match,
                                                     [&](impl_desc_type implType) {
diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp
index 4558b9c7749b00..7b0e46ce8e5ce7 100644
--- a/src/plugins/intel_cpu/src/nodes/rnn.cpp
+++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp
@@ -1170,6 +1170,11 @@ void RNN::prepareParams() {
         OPENVINO_THROW("Primitive descriptor was not found for node ", getName(), ".");
     }
 
+#ifdef CPU_DEBUG_CAPS
+    auto pd = execPtr->getPrimitiveDesc();
+    DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n");
+#endif
+
     if (!primArgs.count(DNNL_ARG_WEIGHTS_LAYER) || !prevExecPtr ||
         !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) {
         prepareMemory(execPtr->getWeightDesc(), 0);
diff --git a/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp b/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp
index 3ec7596c784ef3..d7a1e5979ddad9 100644
--- a/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp
+++ b/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp
@@ -13,10 +13,15 @@ namespace intel_cpu {
 impl_desc_type parse_impl_name(std::string impl_desc_name) {
     impl_desc_type res = impl_desc_type::unknown;
 
-#define REPLACE_WORD(_wrd, _sub) auto pos = impl_desc_name.find(#_wrd); \
-    if (pos != std::string::npos) impl_desc_name.replace(pos, std::string(#_wrd).length(), #_sub);
+#define REPLACE_WORD(_wrd, _sub) { auto pos = impl_desc_name.find(#_wrd); \
+    if (pos != std::string::npos) impl_desc_name.replace(pos, std::string(#_wrd).length(), #_sub); }
+    // Replace the ONEDNN pd name with OV definition.
+    REPLACE_WORD(brg_conv, brgconv);
+    REPLACE_WORD(avx10_1_512, avx512);
+    REPLACE_WORD(brg_matmul, brgemm);
 
     REPLACE_WORD(simple, ref);
+
 #undef REPLACE_WORD
 
 #define SEARCH_WORD(_wrd) if (impl_desc_name.find(#_wrd) != std::string::npos) \
diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn
index f1cf31a2fa0979..cebd91ce7f2256 160000
--- a/src/plugins/intel_cpu/thirdparty/onednn
+++ b/src/plugins/intel_cpu/thirdparty/onednn
@@ -1 +1 @@
-Subproject commit f1cf31a2fa097932b8d74e88bf4bd941382504e4
+Subproject commit cebd91ce7f22567790f45d84e0b59b937dcc6e10
diff --git a/thirdparty/xbyak b/thirdparty/xbyak
index 58642e0cdd5cbe..2ce465bbca46e9 160000
--- a/thirdparty/xbyak
+++ b/thirdparty/xbyak
@@ -1 +1 @@
-Subproject commit 58642e0cdd5cbe12f5d6e05069ddddbc0f5d5383
+Subproject commit 2ce465bbca46e92dde9c44bbe7940fd7f70e3b97

From aa33dc52a7b960386a0156fa2d3d402db73e6dfb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 10:33:51 +0200
Subject: [PATCH 05/10] Bump pyyaml from 6.0 to 6.0.1 in /tests (#24695)

Bumps [pyyaml](https://github.com/yaml/pyyaml) from 6.0 to 6.0.1.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/yaml/pyyaml/blob/main/CHANGES">pyyaml's
changelog</a>.</em></p>
<blockquote>
<p>6.0.1 (2023-07-18)</p>
<ul>
<li><a
href="https://redirect.github.com/yaml/pyyaml/pull/702">yaml/pyyaml#702</a>
-- pin Cython build dep to &lt; 3.0</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/yaml/pyyaml/commit/c42fa3bff1eabdb64763bb1526d9ea1ccb708479"><code>c42fa3b</code></a>
6.0.1 release</li>
<li><a
href="https://github.com/yaml/pyyaml/commit/ae08bdc82b4ddfcd2b93c8aedcd1963766c3307d"><code>ae08bdc</code></a>
block Cython 3.0+ as a build dep (<a
href="https://redirect.github.com/yaml/pyyaml/issues/702">#702</a>)</li>
<li><a
href="https://github.com/yaml/pyyaml/commit/f873cfef316290c7cb5351b7e77a8d69d48d4f7d"><code>f873cfe</code></a>
Add python 3.11 support (<a
href="https://redirect.github.com/yaml/pyyaml/issues/663">#663</a>)</li>
<li>See full diff in <a
href="https://github.com/yaml/pyyaml/compare/6.0...6.0.1">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=pyyaml&package-manager=pip&previous-version=6.0&new-version=6.0.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting `@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

> **Note**
> Automatic rebases have been disabled on this pull request as it has
been open for over 30 days.

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 tests/e2e_tests/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e_tests/requirements.txt b/tests/e2e_tests/requirements.txt
index 2d380c682819aa..3b5773145d4469 100644
--- a/tests/e2e_tests/requirements.txt
+++ b/tests/e2e_tests/requirements.txt
@@ -32,7 +32,7 @@ pytest-timeout==2.2.0
 # for common utils, e2e_tests
 openvino-dev
 distro==1.9.0
-pyyaml==6.0
+pyyaml==6.0.1
 jsonschema==4.22.0
 # filelock==3.9.0
 omegaconf>=2.1,<2.4

From 5b7d9bb7fa1672eb42019dfe5d5b9b4aab43afc5 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Tue, 30 Jul 2024 11:00:59 +0200
Subject: [PATCH 06/10] [PT FE] Update hf tests and add LLM tests (#25758)

### Details:
 - *item1*
 - *...*

### Tickets:
 - *ticket-id*
---
 .../workflows/job_pytorch_models_tests.yml    |   2 +-
 .../pytorch/hf_transformers_models            | 656 +++++++-----------
 .../pytorch/test_hf_transformers.py           | 328 +++++----
 tests/model_hub_tests/pytorch/test_llm.py     | 224 ++++++
 tests/model_hub_tests/pytorch/test_timm.py    |   4 +-
 5 files changed, 665 insertions(+), 549 deletions(-)
 create mode 100644 tests/model_hub_tests/pytorch/test_llm.py

diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml
index b910d9242647b1..381b0a51eb49df 100644
--- a/.github/workflows/job_pytorch_models_tests.yml
+++ b/.github/workflows/job_pytorch_models_tests.yml
@@ -183,7 +183,7 @@ jobs:
         uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
         if: ${{ !cancelled() }}
         with:
-          name: test-results-torch-models-${{ inputs.model_scope == 'precommit' }}
+          name: test-results-torch-models-${{ inputs.model_scope }}
           path: |
             ${{ env.INSTALL_TEST_DIR }}/TEST-torch*
           if-no-files-found: 'error'
diff --git a/tests/model_hub_tests/pytorch/hf_transformers_models b/tests/model_hub_tests/pytorch/hf_transformers_models
index f79f32b6d93ee8..d52215b8b4e81e 100644
--- a/tests/model_hub_tests/pytorch/hf_transformers_models
+++ b/tests/model_hub_tests/pytorch/hf_transformers_models
@@ -1,420 +1,236 @@
-# List of models obtained from huggingface_hub api as one model per architecture with highest likes and downloads
-abcp4/mymodel-test,mymodel,skip,Load problem
-abeja/gpt-neox-japanese-2.7b,gpt_neox_japanese
-acl-submission-anonym/EAM-spectral,examuse,skip,Load problem
-adalbertojunior/modular-test,modular,skip,Load problem
-adept/persimmon-8b-base,persimmon
-aerner/lm-v2,open-llama
-afonsosamarques/ardt-vanilla-combo_train_hopper_v2-2508_1336-33,decision_transformer,xfail,Tracing problem
-aihijo/gec-zh-gector-bert-large,gector,skip,Load problem
-albert-base-v2,albert
-AlekseyKorshuk/test_reward_model,reward_model,skip,Load problem
-alibaba-damo/mgp-str-base,mgp-str,xfail,Compile error: unsupported Einsum
-allenai/hvila-block-layoutlm-finetuned-docbank,hierarchical_model,skip,Load problem
-allenai/longformer-base-4096,longformer
-ameya772/sentence-t5-base-atis-fine-tuned,T5,skip,Load problem
-andreasmadsen/efficient_mlm_m0.40,roberta-prelayernorm
-anton-l/emformer-base-librispeech,emformer,skip,Load problem
-anugunj/omnivore-swinL-in21k,omnivore,skip,Load problem
-apple/mobilevitv2-1.0-imagenet1k-256,mobilevitv2,xfail,Unsupported op aten::col2im
-ArthurZ/jukebox_prior_0,jukebox_prior,skip,Load problem
-ArthurZ/jukebox-vqvae,jukebox_vqvae,skip,Load problem
-asapp/sew-d-base-plus-400k-ft-ls100h,sew-d
-ashishpatel26/span-marker-bert-base-fewnerd-coarse-super,span-marker,skip,Load problem
-asi/albert-act-tiny,albert_act,skip,Load problem
-BAAI/AltCLIP,altclip
-BAAI/AquilaCode-py,aquila
-bana513/opennmt-translator-en-hu,opennmt-translator,skip,Load problem
-benjamin/wtp-bert-mini,bert-char,skip,Load problem
-benjamin/wtp-canine-s-1l,la-canine,skip,Load problem
-beomi/exKcBERT-kowiki,exbert,skip,Load problem
-bert-base-uncased,bert
-bigscience/bloom-560m,bloom
-Bono93/basic-cnn-example,simplecnn,skip,Load problem
-caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr,swin2sr
-camembert-base,camembert
-CarlCochet/trajectory-transformer-ant-expert-v2,trajectory_transformer,xfail,Tracing problem
-CEBaB/bert-base-uncased.CEBaB.causalm.ambiance.2-class.exclusive.seed_42,bert_causalm,skip,Load problem
-CEBaB/gpt2.CEBaB.causalm.None__None.2-class.exclusive.seed_42,gpt2_causalm,skip,Load problem
-CEBaB/lstm.CEBaB.causalm.ambiance__food.2-class.exclusive.seed_42,lstm_causalm,skip,Load problem
-CEBaB/roberta-base.CEBaB.causalm.None__None.2-class.exclusive.seed_42,roberta_causalm,skip,Load problem
-cestwc/lava-small-gigaword,lava,skip,Load problem
-chlab/efficientnet_61_planet_detection,efficientnet_61_planet_detection,skip,Load problem
-CIDAS/clipseg-rd64-refined,clipseg
-clefourrier/graphormer-base-pcqm4mv2,graphormer,xfail,Tracing problem
-cliang1453/deberta-v3-base-rte-teacher-stage1,ted-deberta-v2,skip,Load problem
-clip-italian/clip-italian,vision-text-dual-encoder,xfail,Tracing problem
-Clyde013/deqbert-base,deqbert,skip,Load problem
-connor-henderson/fastspeech2_conformer,fastspeech2_conformer,skip,Load problem
-connor-henderson/fastspeech2_conformer_with_hifigan,fastspeech2_conformer_with_hifigan,skip,Load problem
-csarron/meter-vqa2-ft,meter,skip,Load problem
-ctrl,ctrl
-cwkeam/mctc-large,mctc,skip,Load problem
-dandelin/vilt-b32-finetuned-vqa,vilt,xfail,Accuracy due to random
-dangkhoadl/custom_CNN_1D,cnn,skip,Load problem
-declare-lab/segue-w2v2-base,segue,skip,Load problem
-deepesh0x/autotrain-mlsec-1013333726,julien,skip,Load problem
-Dewa/Dog_Model_From_Scratch_v2,simple_image_classification,skip,Load problem
-dg845/univnet-dev,univnet,skip,Load problem
-distilbert-base-uncased,distilbert
-DiViorg/REC-Transformer,rec_transformer,skip,Load problem
-dreamerlin/chatbind-7b-delta,llava_image_bind_select,skip,Load problem
-edugp/data2vec-nlp-base,data2vec,skip,Load problem
-EleutherAI/enformer-official-rough,enformer,skip,Load problem
-EleutherAI/gpt-neo-125m,gpt_neo
-EleutherAI/pythia-6.9b,gpt_neox
-facebook/bart-large-mnli,bart
-facebook/blenderbot-400M-distill,blenderbot
-facebook/blenderbot_small-90M,blenderbot-small
-facebook/convnextv2-tiny-22k-384,convnextv2
-facebook/detr-resnet-50,detr
-facebook/dinov2-base,dinov2
-facebook/dpr-question_encoder-single-nq-base,dpr
-facebook/encodec_24khz,encodec
-facebook/esm2_t6_8M_UR50D,esm
-facebook/flava-full,flava,xfail,Tracing problem
-facebook/flava-image-codebook,flava_image_codebook,skip,Load problem
-facebook/levit-128S,levit
-facebook/m2m100_418M,m2m_100
-facebook/mask2former-swin-base-coco-panoptic,mask2former
-facebook/maskformer-swin-base-coco,maskformer
-facebook/mbart-large-50-many-to-many-mmt,mbart
-facebook/mms-lid-126,wav2vec2
-facebook/mms-tts-eng,vits,xfail,Accuracy failed: results cannot be broadcasted
-facebook/musicgen-small,musicgen
-facebook/opt-125m,opt
-facebook/rag-token-nq,rag,skip,Load problem
-facebook/sam-vit-large,sam,xfail,No node with name original_sizes
-facebook/timesformer-base-finetuned-k400,timesformer
-facebook/vit-mae-base,vit_mae
-facebook/wmt19-ru-en,fsmt,xfail,Tracing problem
-facebook/xglm-7.5B,xglm
-facebook/xlm-roberta-xl,xlm-roberta-xl
-facebook/xmod-base,xmod
-flax-community/ft5-cnn-dm,f_t5
-fnlp/elasticbert-base,elasticbert,skip,Load problem
-FranzStrauss/ponet-base-uncased,ponet,skip,Load problem
-funnel-transformer/small,funnel
-fusing/latent-diffusion-text2im-large,ldmbert,skip,Load problem
-Geor111y/flair-ner-addresses-extractor,flair,skip,Load problem
-gia-project/gia,gia,skip,Load problem
-gokuls/bert_12_layer_model_v1,hybridbert,skip,Load problem
-google/bigbird-roberta-base,big_bird
-google/bigbird-pegasus-large-arxiv,bigbird-pegasus
-google/bit-50,bit
-google/canine-s,canine,xfail,aten::slice: Parameter axis 3 out of the tensor rank range
-google/efficientnet-b2,efficientnet,xfail,Compile error: AvgPool: Kernel after dilation has size (dim: 1408) larger than the data shape after padding (dim: 9) at axis 0.
-google/electra-base-discriminator,electra
-google/flan-t5-base,t5
-google/fnet-base,fnet,xfail,Unsupported op aten::fft_fftn aten::real
-google/mobilebert-uncased,mobilebert
-google/mobilenet_v1_0.75_192,mobilenet_v1
-google/mt5-base,mt5
-google/owlv2-base-patch16-ensemble,owlv2
-google/owlvit-base-patch32,owlvit
-google/pix2struct-docvqa-base,pix2struct
-google/realm-orqa-nq-openqa,realm,skip,Load problem
-google/reformer-crime-and-punishment,reformer,xfail,Tracing problem
-google/tapas-large-finetuned-wtq,tapas
-google/vit-hybrid-base-bit-384,vit-hybrid,skip,Load problem
-google/vivit-b-16x2-kinetics400,vivit
-Goutham-Vignesh/ContributionSentClassification-scibert,scibert
-gpt2,gpt2
-Graphcore/groupbert-base-uncased,groupbert,skip,Load problem
-haoranzhao419/saffu-100M-0.1,saffu-100M-0.1,skip,Load problem
-Helsinki-NLP/opus-mt-fr-en,marian
-#hf-internal-testing/random-nllb-moe-2-experts,nllb-moe,skip,Load problem
-hf-internal-testing/tiny-random-CodeGenModel,codegen
-hf-internal-testing/tiny-random-convnext,convnext
-hf-internal-testing/tiny-random-CvtModel,cvt
-hf-internal-testing/tiny-random-Data2VecAudioModel,data2vec-audio,skip,Load problem
-hf-internal-testing/tiny-random-Data2VecTextModel,data2vec-text
-hf-internal-testing/tiny-random-Data2VecVisionModel,data2vec-vision
-hf-internal-testing/tiny-random-DeiTModel,deit
-hf-internal-testing/tiny-random-DonutSwinModel,donut-swin
-hf-internal-testing/tiny-random-EfficientFormerForImageClassification,efficientformer
-hf-internal-testing/tiny-random-flaubert,flaubert
-hf-internal-testing/tiny-random-FocalNetModel,focalnet
-hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,gpt_bigcode
-hf-internal-testing/tiny-random-GPTJModel,gptj
-hf-internal-testing/tiny-random-groupvit,groupvit
-hf-internal-testing/tiny-random-IBertModel,ibert
-hf-internal-testing/tiny-random-ImageGPTModel,imagegpt
-hf-internal-testing/tiny-random-LiltModel,lilt
-hf-internal-testing/tiny-random-LongT5Model,longt5,skip,Load problem
-hf-internal-testing/tiny-random-MobileNetV2Model,mobilenet_v2
-hf-internal-testing/tiny-random-mobilevit,mobilevit
-hf-internal-testing/tiny-random-MPNetModel,mpnet
-hf-internal-testing/tiny-random-MptForCausalLM,mpt
-hf-internal-testing/tiny-random-NllbMoeForConditionalGeneration,nllb_moe,skip,Load problem
-hf-internal-testing/tiny-random-NystromformerModel,nystromformer
-hf-internal-testing/tiny-random-RegNetModel,regnet
-hf-internal-testing/tiny-random-RemBertModel,rembert
-hf-internal-testing/tiny-random-RoFormerModel,roformer
-hf-internal-testing/tiny-random-SegformerModel,segformer
-hf-internal-testing/tiny-random-SEWModel,sew,skip,Load problem
-hf-internal-testing/tiny-random-Speech2TextModel,speech_to_text,skip,Load problem
-hf-internal-testing/tiny-random-speech-encoder-decoder,speech-encoder-decoder,skip,Load problem
-hf-internal-testing/tiny-random-SplinterModel,splinter
-hf-internal-testing/tiny-random-SqueezeBertModel,squeezebert
-hf-internal-testing/tiny-random-SwinModel,swin
-hf-internal-testing/tiny-random-vision_perceiver_conv,perceiver
-hf-internal-testing/tiny-random-ViTMSNModel,vit_msn
-hf-internal-testing/tiny-random-wav2vec2-conformer,wav2vec2-conformer
-hf-internal-testing/tiny-random-XLMModel,xlm
-hfl/vle-base,vle,skip,Load problem
-HJHGJGHHG/GAU-Base-Full,gau,skip,Load problem
-huggingface/autoformer-tourism-monthly,autoformer,skip,Load problem
-huggingface/informer-tourism-monthly,informer,skip,Load problem
-huggingface/time-series-transformer-tourism-monthly,time_series_transformer,skip,Load problem
-HuggingFaceM4/tiny-random-idefics,idefics,xfail,Unsupported op aten::any aten::einsum prim::TupleConstruct prim::TupleUnpack
-HuggingFaceM4/tiny-random-vllama-clip,vllama,skip,Load problem
-HuggingFaceM4/tiny-random-vopt-clip,vopt,skip,Load problem
-HuggingFaceH4/zephyr-7b-beta,mistral
-HuiHuang/gpt3-damo-base-zh,gpt3,skip,Load problem
-hustvl/yolos-tiny,yolos
-iakarshu/tilt_base,tilt_base_configuration,skip,Load problem
-ibm/MoLM-350M-4B,moduleformer,skip,Load problem
-IDEA-CCNL/Randeng-Deltalm-362M-En-Zh,Deltalm,skip,Load problem
-Inderpreet01/seaformer-semantic-segmentation-large,seaformer,skip,Load problem
-Intel/dpt-hybrid-midas,dpt
-# Intel/tvp-base,tvp,skip,Load problem # takes too long
-isemmanuelolowe/code-embedder,instruct-codebert,skip,Load problem
-isemmanuelolowe/instruct-codet5-5,instruct-codet5,skip,Load problem
-jaketae/fastspeech2-ljspeech,fastspeech2,skip,Load problem
-jambran/depression-classification,DepressionDetection,skip,Load problem
-Jellywibble/dalio-reward-charlie-v1,reward-model,skip,Load problem
-JonasGeiping/crammed-bert-legacy,crammedBERT,skip,Load problem
-jonatasgrosman/wav2vec2-large-xlsr-53-english,wav2vec2
-Joqsan/test-my-fnet,my_fnet,skip,Load problem
-jozhang97/deta-swin-large,deta,skip,Load problem
-jploski/retnet-mini-shakespeare,retnet,skip,Load problem
-juhi7ag/idea-model2,,skip,Load problem
-junnyu/autobert-small-light,autobert,skip,Load problem
-junnyu/chinese_GAU-alpha-char_L-24_H-768,gau_alpha,skip,Load problem
-junnyu/flash_small_wwm_cluecorpussmall,flash,skip,Load problem
-junnyu/flashquad_small_wwm_cluecorpussmall,flash_quad,skip,Load problem
-kakaobrain/align-base,align,xfail,Compile error: AvgPool: Kernel after dilation has size (dim: 640) larger than the data shape after padding (dim: 9) at axis 0.
-KBLab/megatron-bert-large-swedish-cased-110k,megatron-bert
-kiddothe2b/hierarchical-transformer-base-4096-v2,hat,skip,Load problem
-k-l-lambda/clip-text-generator,clip_text_generator,skip,Load problem
-k-l-lambda/stable-diffusion-v1-4-inv-embed,inv_word_embed,skip,Load problem
-krasserm/perceiver-ar-clm-base,perceiver-ar-causal-language-model,skip,Load problem
-krasserm/perceiver-ar-sam-giant-midi,perceiver-ar-symbolic-audio-model,skip,Load problem
-krasserm/perceiver-io-img-clf,perceiver-io-image-classifier,skip,Load problem
-krasserm/perceiver-io-mlm,perceiver-io-masked-language-model,skip,Load problem
-krasserm/perceiver-io-optical-flow,perceiver-io-optical-flow,skip,Load problem
-krasserm/perceiver-io-txt-clf-imdb,perceiver-io-text-classifier,skip,Load problem
-ksmcg/fan_small_12_p16_224,fan,skip,Load problem
-laion/clap-htsat-unfused:audio_model,clap
-laion/clap-htsat-unfused:audio_projection,clap
-Langboat/ReGPT-125M-200G,re_gpt,skip,Load problem
-lengyue233/content-vec-best,hubert
-Lewislou/cellseg_sribd,cell_sribd,skip,Load problem
-liamcripwell/o-conbart,context-bart,skip,Load problem
-liamcripwell/pgdyn-plan,context-roberta,skip,Load problem
-linhdo/graphdoc,graphdoc,skip,Load problem
-LinkSoul/Chinese-LLaVA-Baichuan,llava,skip,Load problem
-LinkSoul/LLaSM-Cllama2,llaaa,skip,Load problem
-lintang/pile-t5-base-flan,umt5
-liuhaotian/LLaVA-Lightning-MPT-7B-preview,llava_mpt,skip,Load problem
-liya0121/my_finetune_0121,progen,skip,Load problem
-lucadiliello/BLEURT-20,bleurt,skip,Load problem
-lum-ai/metal,metal,skip,Load problem
-luodian/OTTER-MPT1B-RPJama-Init,otter,skip,Load problem
-luoruipu1/valley-13b-v1-delta,Valley,skip,Load problem
-luoruipu1/Valley2-7b,valley,skip,Load problem
-Lutech-AI/I-SPIn,I-SPIn,skip,Load problem
-MAGAer13/mplug-owl-llama-7b,mplug-owl,skip,Load problem
-manu/contrastive_zeroner,contrastive_zeroner,skip,Load problem
-manu/lilt-infoxlm-base,liltrobertalike,skip,Load problem
-manu/mplt_untrained,mplt,skip,Load problem
-matheusntg/character-bert-pt-normal,character_bert,skip,Load problem
-MBZUAI/swiftformer-xs,swiftformer
-MCG-NJU/videomae-base-finetuned-kinetics,videomae
-M-CLIP/XLM-Roberta-Large-Vit-B-32,M-CLIP,skip,Load problem
-medhabi/distilbert-base-uncased-score-pred,text-to-rating,skip,Load problem
-meta-llama/Llama-2-7b-hf,llama,skip,Load problem
-microsoft/beit-base-patch16-224-pt22k-ft22k,beit
-microsoft/biogpt,biogpt
-microsoft/conditional-detr-resnet-50,conditional_detr
-microsoft/deberta-base,deberta
-microsoft/git-large-coco,git,xfail,Tracing error: Please check correctness of provided example_input (but eval was correct)
-microsoft/kosmos-2-patch14-224,kosmos-2
-microsoft/layoutlm-base-uncased,layoutlm
-microsoft/layoutlmv2-base-uncased,layoutlmv2,xfail,Tracing error: Please check correctness of provided example_input (but eval was correct)
-microsoft/layoutlmv3-base,layoutlmv3
-microsoft/markuplm-base,markuplm
-microsoft/prophetnet-large-uncased-squad-qg,prophetnet
-microsoft/resnet-50,resnet
-microsoft/speecht5_hifigan,hifigan,skip,Load problem
-microsoft/speecht5_tts,speecht5,xfail,Unsupported op aten::bernoulli
-microsoft/swinv2-tiny-patch4-window8-256,swinv2
-microsoft/table-transformer-detection,table-transformer
-microsoft/unispeech-1350-en-17h-ky-ft-1h,unispeech
-microsoft/unispeech-sat-base-100h-libri-ft,unispeech-sat
-microsoft/wavlm-large,wavlm,skip,Load problem
-microsoft/xclip-base-patch32,xclip
-microsoft/xprophetnet-large-wiki100-cased,xlm-prophetnet
-miguelvictor/python-fromzero-lstmlm,lstmlm,skip,Load problem
-mingzi151/test-hf-wav2vec2bert,wav2vec2bert,skip,Load problem
-MIT/ast-finetuned-audioset-10-10-0.4593,audio-spectrogram-transformer
-Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime,luke
-mlml-chip/thyme2_colon_e2e,cnlpt,skip,Load problem
-mnaylor/mega-base-wikitext,mega,xfail,Tracing error: Please check correctness of provided example_input (but eval was correct)
-mohitsha/tiny-random-testing-bert2gpt2,encoder-decoder
-MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli,deberta-v2
-MoritzLaurer/ernie-m-large-mnli-xnli,ernie_m
-mrm8488/prunebert-base-uncased-finepruned-topK-squadv2,masked_bert,skip,Load problem
-muditb/headline_classifier,BertModel,skip,Load problem
-nanashi161382/clip-text-deprojector,clip_text_deprojector_model,skip,Load problem
-nateraw/vit-age-classifier,vit
-naver-clova-ocr/bros-base-uncased,bros
-navervision/CompoDiff-Aesthetic,CompoDiff,skip,Load problem
-navervision/KELIP,kelip,skip,Load problem
-NCAI/NCAI-BERT,lean_albert,skip,Load problem
-nglaura/skimformer,skimformer,skip,Load problem
-nguyenvulebinh/robustspeech-asr,robustspeech,skip,Load problem
-nguyenvulebinh/voice-filter,voicefilter,skip,Load problem
-NiCy/seg-ment-tation,seg-ment-tation,skip,Load problem
-nielsr/audio-spectogram-transformer-finetuned-audioset-10-10-0.4593,audio-spectogram-transformer,skip,Load problem
-nielsr/convnext-tiny-maskrcnn,maskrcnn,skip,Load problem
-nielsr/H3-125m,h3,skip,Load problem
-nielsr/layoutreader-readingbank,layoutreader,skip,Load problem
-nielsr/pix2seq-base,pix2seq,skip,Load problem
-nielsr/tapex-large,tapex,skip,Load problem
-nielsr/udop-large,udop,skip,Load problem
-nielsr/vitmatte-small-composition-1k,vitmatte,skip,Load problem
-nllg/poetry-bygpt5-small-en,bygpt5,skip,Load problem
-nlpconnect/vit-gpt2-image-captioning,vision-encoder-decoder
-OATML-Markslab/Tranception_Small,tranception,skip,Load problem
-OFA-Sys/chinese-clip-vit-base-patch16,chinese_clip
-openai/clip-vit-large-patch14,clip
-openai/jukebox-1b-lyrics,jukebox,skip,Load problem
-openai/whisper-medium,whisper,skip,Load problem
-openai-gpt,openai-gpt
-OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1,gpt_neox_reward_model,skip,Load problem
-openmmlab/upernet-convnext-small,upernet
-openMUSE/clip-vit-large-patch14-text-enc,clip_text_model,skip,Load problem
-katuni4ka/opt-125m-gptq,opt
-PatrickHaller/ngme-llama-264M,ngme,skip,Load problem
-patrickvonplaten/bert2gpt2-cnn_dailymail-fp16,encoder_decoder,skip,Load problem
-paulhindemith/test-zeroshot,test-zeroshot,skip,Load problem
-PGT/orig-nystromformer-s-artificial-balanced-max500-490000-0,graph_nystromformer,skip,Load problem
-pie/example-ner-spanclf-conll03,TransformerSpanClassificationModel,skip,Load problem
-pie/example-re-textclf-tacred,TransformerTextClassificationModel,skip,Load problem
-pleisto/yuren-baichuan-7b,multimodal_llama
-predictia/europe_reanalysis_downscaler_convbaseline,convbilinear,skip,Load problem
-predictia/europe_reanalysis_downscaler_convswin2sr,conv_swin2sr,skip,Load problem
-pszemraj/led-large-book-summary,led
-pszemraj/pegasus-x-large-book-summary,pegasus_x
-qmeeus/whisper-small-ner-combined,whisper_for_slu,skip,Load problem
-raman-ai/pcqv2-tokengt-lap16,tokengt,skip,Load problem
-range3/pegasus-gpt2-medium,pegasusgpt2,skip,Load problem
-regisss/bridgetower-newyorker-a100-8x,bridgetower
-rinna/japanese-cloob-vit-b-16,cloob,skip,Load problem
-Rocketknight1/tiny-random-falcon-7b,falcon
-RUCAIBox/mass-base-uncased,mass,skip,Load problem
-RWKV/rwkv-4-169m-pile,rwkv
-sahasrarjn/interbert,BERT,skip,Load problem
-saibo/genkalm-medium-gpt2,genkalm,skip,Load problem
-sail/poolformer_m36,poolformer
-SajjadAyoubi/clip-fa-vision,clip_vision_model
-Salesforce/blip2-flan-t5-xl:vision_model,blip-2
-Salesforce/blip2-flan-t5-xl:qformer,blip-2
-Salesforce/blip2-flan-t5-xl:language_projection,blip-2
-Salesforce/blip-image-captioning-large,blip
-Salesforce/instructblip-vicuna-7b,instructblip,skip,Load problem
-SamLowe/roberta-base-go_emotions,roberta
-sanchit-gandhi/enhanced_direct_s2st_en_to_es,speech-to-speech,skip,Load problem
-sciki/finetune_tinybert,finetune-tinybert,skip,Load problem
-sebastian-hofstaetter/colbert-distilbert-margin_mse-T2-msmarco,ColBERT,skip,Load problem
-sebastian-hofstaetter/distilbert-cat-margin_mse-T2-msmarco,BERT_Cat,skip,Load problem
-sebastian-hofstaetter/idcm-distilbert-msmarco_doc,IDCM,skip,Load problem
-SenseTime/deformable-detr,deformable_detr,xfail,Tracing error: Please check correctness of provided example_input (but eval was correct)
-shahules786/Reward-model-gptneox-410M,rm_gptneox_config,skip,Load problem
-shauray/Llava-Llama-2-7B-hf,llavallama,skip,Load problem
-shauray/ViTPose,vitpose,skip,Load problem
-sheonhan/ict-imagenet-256,ict,skip,Load problem
-shibing624/text2vec-base-chinese-paraphrase,ernie
-shikhartuli/flexibert-mini,flexibert,skip,Load problem
-shikras/shikra-7b-delta-v1-0708,shikra,skip,Load problem
-shi-labs/dinat-mini-in1k-224,dinat,xfail,Accuracy validation failed
-shi-labs/nat-mini-in1k-224,nat,xfail,Accuracy validation failed
-shi-labs/oneformer_ade20k_swin_large,oneformer,xfail,Different number of outputs between framework and OpenVINO
-shuqi/seed-encoder,seed_encoder,skip,Load problem
-sijunhe/nezha-cn-base,nezha
-sjiang1/codecse,roberta_for_cl,skip,Load problem
-slh/fcnet-base-cased,fcnet,skip,Load problem
-snoop2head/Deep-Shallow-Ko2En,transformer,skip,Load problem
-Solomonik/SeqTokenModelMultiple,SeqToken,skip,Load problem
-solotimes/lavibe_base,donut,skip,Load problem
-songlab/gpn-brassicales,ConvNet,skip,Load problem
-speechbrain/m-ctc-t-large,mctct
-Splend1dchan/wav2vec2-large-lv60_t5lephone-small_lna_bs64,speechmix,skip,Load problem
-stefan-it/bort-full,bort
-SteveZhan/my-resnet50d,resnet_steve,skip,Load problem
-suno/bark,bark,skip,Load problem
-surajnair/r3m-50,r3m,skip,Load problem
-susnato/clvp_dev,clvp,skip,Load problem
-susnato/phi-1_5_dev,phi
-Tanrei/GPTSAN-japanese,gptsan-japanese,xfail,Unsupported op aten::index_put_ prim::TupleConstruct
-tau/bart-large-sled-govreport,tau/sled,skip,Load problem
-taufeeque/best-cb-model,codebook,skip,Load problem
-Team-PIXEL/pixel-base,pixel,skip,Load problem
-tensorpro/clip_vip_pretrained_base_16,clip_vip,skip,Load problem
-thomwolf/vqgan_imagenet_f16_1024,vqgan_model,skip,Load problem
-thu-ml/zh-clip-vit-roberta-large-patch14,zhclip,skip,Load problem
-tifa-benchmark/promptcap-coco-vqa,ofa,skip,Load problem
-tli8hf/robertabase_snli,transformerfornli,skip,Load problem
-# transfo-xl/transfo-xl-wt103,transfo-xl - deprecated by transformers due to security vulnerability, not inferable in latest transformers
-transZ/BART_shared_clean,shared_bart,skip,Load problem
-transZ/BART_shared_v2,shared_bart_v2,skip,Load problem
-transZ/misecom,misecom,skip,Load problem
-transZ/parex,parex,skip,Load problem
-transZ/phrext,phrext,skip,Load problem
-transZ/reword,reword,skip,Load problem
-transZ/roberta_texid,roberta_texid,skip,Load problem
-transZ/tforge_v1.9,Transformer_Forge,skip,Load problem
-trl-internal-testing/tiny-random-BigBirdPegasusForConditionalGeneration,bigbird_pegasus
-trl-internal-testing/tiny-random-BlenderbotSmallForConditionalGeneration,blenderbot-small,skip,Load problem
-trl-internal-testing/tiny-random-MvpForConditionalGeneration,mvp
-trl-internal-testing/tiny-random-SwitchTransformersForConditionalGeneration,switch_transformers,skip,Load problem
-tuner007/pegasus_paraphrase,pegasus
-turing-motors/heron-chat-blip-ja-stablelm-base-7b-v0,video_blip,skip,Load problem
-turing-motors/heron-chat-git-ELYZA-fast-7b-v0,git_llama,skip,Load problem
-uclanlp/plbart-base,plbart
-uclanlp/visualbert-vqa-coco-pre,visual_bert
-ummagumm-a/samolet_room_classifier,AirModelHF,skip,Load problem
-ummagumm-a/samolet-room-classifier,gru,skip,Load problem
-UNCANNY69/Misinfo-BERT-LSTM,BertLSTMForSequenceClassification,skip,Load problem
-UNCANNY69/Miss-BERT-CNN,BertCNNForSequenceClassification,skip,Load problem
-unc-nlp/lxmert-base-uncased,lxmert,skip,Load problem
-uw-madison/mra-base-512-4,mra
-uw-madison/yoso-4096,yoso
-valhalla/cogview-gpt2-test,cog_view,skip,Load problem
-valhalla/s2t_mustc_multilinguial_medium,speech_to_text_transformer,skip,Load problem
-vblagoje/greaselm-csqa,greaselm,skip,Load problem
-vinvino02/glpn-nyu,glpn
-Visual-Attention-Network/van-base,van
-visualjoyce/transformers4vl-uniter-base,uniter,skip,Load problem
-visualjoyce/transformers4vl-vilbert-mt,vilbert,skip,Load problem
-vumichien/nonsemantic-speech-trillsson3,trillsson_efficientnet,skip,Load problem
-vumichien/trillsson3-ft-keyword-spotting-12,trillsson_efficient,skip,Load problem
-wangruiai2023/nougat,nougat,skip,Load problem
-weiweishi/roc-bert-base-zh,roc_bert
-WENGSYX/CoNN_Parity,conn,skip,Load problem
-xlm-roberta-base,xlm-roberta
-xlnet-base-cased,xlnet
-ybelkada/focusondepth,focusondepth,skip,Load problem
-ybelkada/random-tiny-BertGenerationModel,bert-generation
-YituTech/conv-bert-base,convbert
-yjernite/retribert-base-uncased,retribert,xfail,Unsupported op aten::cross_entropy_loss
-ylacombe/hf-seamless-m4t-medium,seamless_m4t,skip,Load problem
-youzanai/clip-product-title-chinese,clip_chinese_model,skip,Load problem
-Yova/SmallCapOPT7M,smallcap,skip,Load problem
-yusufani/trclip-vitl14-e10,trclip,skip,Load problem
-yysung53/dpr,text_similarity,skip,Load problem
-Zetatech/pvt-tiny-224,pvt
-ZinengTang/tvlt-base,tvlt,xfail,Conversion is failed for aten::cat: Argument element types are inconsistent
-zuppif/resnetd-18,resnetd,skip,Load problem
+# List of models
+albert,albert/albert-base-v2
+align,kakaobrain/align-base,xfail,Compile error: AvgPool: Kernel after dilation has size (dim: 640) larger than the data shape after padding (dim: 9) at axis 0.
+altclip,BAAI/AltCLIP
+aquila,BAAI/AquilaCode-py
+audio-spectrogram-transformer,MIT/ast-finetuned-audioset-10-10-0.4593
+autoformer,huggingface/autoformer-tourism-monthly,xfail,Load error: mat1 and mat2 shapes cannot be multiplied
+bark,suno/bark,xfail,Load error: got an unexpected keyword argument 'input_ids'
+bart,facebook/bart-large-mnli
+beit,microsoft/beit-base-patch16-224-pt22k-ft22k
+bert,sentence-transformers/all-MiniLM-L6-v2
+bert-generation,google/bert_for_seq_generation_L-24_bbc_encoder
+big_bird,google/bigbird-roberta-base
+bigbird_pegasus,google/bigbird-pegasus-large-arxiv
+biogpt,microsoft/biogpt
+bit,google/bit-50
+blenderbot,facebook/blenderbot-400M-distill
+blenderbot-small,facebook/blenderbot_small-90M
+blip,Salesforce/blip-image-captioning-large
+blip_2_qformer,ZinengTang/qformer
+blip_2_vision_model,ZinengTang/clip_last_layer_removed
+blip-2,Salesforce/blip2-opt-2.7b:language_projection
+blip-2,Salesforce/blip2-opt-2.7b:qformer
+blip-2,Salesforce/blip2-opt-2.7b:vision_model
+bloom,bigscience/bloom-560m
+bort,stefan-it/bort-full
+bridgetower,regisss/bridgetower-newyorker-a100-8x
+bros,naver-clova-ocr/bros-base-uncased
+camembert,Jean-Baptiste/camembert-ner
+canine,google/canine-c,xfail,aten::slice: Parameter axis 3 out of the tensor rank range
+chinese_clip,OFA-Sys/chinese-clip-vit-base-patch16
+clap,laion/clap-htsat-unfused:audio_model
+clap,laion/clap-htsat-unfused:audio_projection
+clip,openai/clip-vit-large-patch14
+clip_text_model,maze/CLIP-ViT-bigG-14
+clip_vision_model,SajjadAyoubi/clip-fa-vision
+clipseg,CIDAS/clipseg-rd64-refined
+clvp,susnato/clvp_dev
+codegen,Salesforce/codegen-350M-mono
+conditional_detr,microsoft/conditional-detr-resnet-50
+convbert,YituTech/conv-bert-base
+convnext,facebook/convnext-large-224
+convnextv2,Pavarissy/ConvNextV2-large-DogBreed
+ctrl,Salesforce/ctrl
+cvt,microsoft/cvt-13
+data2vec-audio,m-a-p/music2vec-v1
+data2vec-text,facebook/data2vec-text-base
+data2vec-vision,facebook/data2vec-vision-large-ft1k
+deberta,microsoft/deberta-base
+deberta-v2,MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7
+decision_transformer,edbeeching/decision-transformer-gym-hopper-expert
+deformable_detr,SenseTime/deformable-detr,xfail,Trace error: op->outputs().size() == 1 INTERNAL ASSERT FAILED
+deit,facebook/deit-base-distilled-patch16-224
+deta,jozhang97/deta-swin-large,xfail,ValueError: operands could not be broadcast together with shapes
+detr,facebook/detr-resnet-50
+dinov2,facebook/dinov2-base
+distilbert,distilbert/distilbert-base-uncased-finetuned-sst-2-english
+donut-swin,pasusarla/donut_encoder
+dpr,facebook/dpr-question_encoder-single-nq-base
+dpt,Intel/dpt-large
+efficientformer,DunnBC22/efficientformer-l3-300-Brain_Tumors_Image_Classification
+efficientnet,google/efficientnet-b7,xfail,Compile error: AvgPool: Kernel after dilation has size (dim: 2560) larger than the data shape after padding (dim: 19) at axis 0.
+electra,google/electra-base-discriminator
+encodec,facebook/encodec_24khz
+encoder_decoder,patrickvonplaten/bert2gpt2-cnn_dailymail-fp16,xfail,Load error: You have to specify either input_ids or inputs_embeds
+encoder-decoder,raynardj/wenyanwen-chinese-translate-to-ancient
+ernie,nghuyong/ernie-3.0-base-zh
+ernie_m,MoritzLaurer/ernie-m-large-mnli-xnli
+esm,InstaDeepAI/nucleotide-transformer-2.5b-multi-species
+f_t5,flax-community/ft5-cnn-dm
+fastspeech2_conformer,espnet/fastspeech2_conformer,xfail,Compile error: CPU plug-in doesn't support Tile operation with dynamic rank. Operation name: aten::repeat_interleave/Tile
+fastspeech2_conformer_with_hifigan,espnet/fastspeech2_conformer_with_hifigan,xfail,Compile error: CPU plug-in doesn't support Tile operation with dynamic rank. Operation name: __module.model/aten::repeat_interleave/Tile
+flaubert,lincoln/flaubert-mlsum-topic-classification
+flava,facebook/flava-full
+flava_image_codebook,facebook/flava-image-codebook
+fnet,google/fnet-base,xfail,Unsupported op aten::fft_fftn aten::real
+focalnet,microsoft/focalnet-small
+fsmt,facebook/wmt19-en-ru,xfail,Trace error: Only tensors lists tuples of tensors or dictionary of tensors can be output from traced functions
+funnel,funnel-transformer/small
+git,microsoft/git-large-coco,xfail,Trace error: We don't have an op for aten::full but it isn't a special case
+glpn,vinvino02/glpn-nyu
+gpt_neo,EleutherAI/gpt-neo-2.7B
+gpt2,openai-community/gpt2
+gptj,databricks/dolly-v1-6b
+gptsan-japanese,Tanrei/GPTSAN-japanese,xfail,Unsupported op aten::index_put_ prim::TupleConstruct prim::TupleUnpack
+graphormer,clefourrier/graphormer-base-pcqm4mv2,xfail,Load error: GraphormerForGraphClassification.forward() missing 6 required positional arguments: 'input_edges' 'attn_bias' 'in_degree' 'out_degree' 'spatial_pos' and 'attn_edge_type'
+grounding-dino,IDEA-Research/grounding-dino-base,xfail,Trace error: op->outputs().size() == 1 INTERNAL ASSERT FAILED
+groupvit,nvidia/groupvit-gcc-yfcc
+hiera,namangarg110/hiera_base_224
+hifigan,microsoft/speecht5_hifigan,xfail,Load error: The size of tensor a (100) must match the size of tensor b (80) at non-singleton dimension 1
+hubert,facebook/hubert-large-ls960-ft
+hybridbert,gokuls/bert_12_layer_model_v1
+ibert,DunnBC22/ibert-roberta-base-Abusive_Or_Threatening_Speech
+idefics,HuggingFaceM4/tiny-random-idefics,xfail,aten::einsum Different input dimensions indicated by the same labels for Einsum must be compatible
+imagegpt,hf-internal-testing/tiny-random-ImageGPTModel
+informer,huggingface/informer-tourism-monthly,xfail,Load error: mat1 and mat2 shapes cannot be multiplied
+instructblip,Salesforce/instructblip-vicuna-7b
+jukebox,openai/jukebox-1b-lyrics,xfail,Load error: Module [JukeboxModel] is missing the required "forward" function
+jukebox_vqvae,ArthurZ/jukebox-vqvae,skip,Load error: The checkpoint you are trying to load has model type `jukebox_vqvae` but Transformers does not recognize this architecture
+kobert,weonjae0211/ds-kobert
+kosmos-2,microsoft/kosmos-2-patch14-224
+LanguageBindDepth,LanguageBind/LanguageBind_Depth
+LanguageBindImage,LanguageBind/LanguageBind_Image
+LanguageBindThermal,LanguageBind/LanguageBind_Thermal
+layoutlm,impira/layoutlm-document-qa
+layoutlmv3,microsoft/layoutlmv3-base
+led,pszemraj/led-large-book-summary
+levit,facebook/levit-128S,xfail,Trace error: Cannot insert a Tensor that requires grad as a constant
+lilt,nielsr/lilt-xlm-roberta-base
+llama_with_landmark,Leooyii/Landmark_512_Slimpajama_1B
+longformer,allenai/longformer-base-4096
+longt5,pszemraj/long-t5-tglobal-base-16384-book-summary,xfail,Compile error: unsupported Einsum
+luke,oshizo/sbert-jsnli-luke-japanese-base-lite
+lxmert,unc-nlp/lxmert-base-uncased
+m2m_100,facebook/nllb-200-distilled-600M
+mamba,dominguesm/mambarim-110m,xfail,Trace error: Tracer cannot infer type of tensor
+marian,Helsinki-NLP/opus-mt-zh-en
+markuplm,microsoft/markuplm-base
+mask2former,facebook/mask2former-swin-large-coco-panoptic
+maskformer,facebook/maskformer-swin-large-ade
+mbart,facebook/mbart-large-50-many-to-many-mmt
+mctct,speechbrain/m-ctc-t-large
+mega,Bingsu/mega-150m-arch,xfail,Trace error: Cannot insert a Tensor that requires grad as a constant
+megatron-bert,UFNLP/gatortron-base
+mgp-str,alibaba-damo/mgp-str-base,xfail,Compile error: unsupported Einsum
+mobilebert,google/mobilebert-uncased
+mobilenet_v1,google/mobilenet_v1_0.75_192
+mobilenet_v2,google/mobilenet_v2_1.0_224
+mobilevit,apple/mobilevit-small
+mobilevitv2,apple/mobilevitv2-1.0-imagenet1k-256,xfail,Col2Im: Static shape inference lacks constant data on port 1
+mpnet,sentence-transformers/all-mpnet-base-v2
+mpt,team-lucid/mptk-1b
+mra,uw-madison/mra-base-512-4
+mt5,csebuetnlp/mT5_multilingual_XLSum
+musicgen,facebook/musicgen-large
+musicgen_melody,ylacombe/musicgen-melody
+mvp,RUCAIBox/mtl-data-to-text
+nezha,sijunhe/nezha-cn-base
+nllb_moe,hf-tiny-model-private/tiny-random-NllbMoeForConditionalGeneration,xfail,Trace error: Tracer cannot infer type of Seq2SeqMoEOutput
+nystromformer,uw-madison/nystromformer-4096
+olmo,DrNicefellow/Microscopic-Olmo-2B-1.1k-steps
+oneformer,shi-labs/oneformer_ade20k_swin_tiny,xfail,Different number of outputs between framework and OpenVINO
+openai-gpt,openai-community/openai-gpt
+open-llama,aerner/lm-v2
+owlv2,google/owlv2-base-patch16-ensemble
+owlvit,google/owlvit-base-patch32
+patchtsmixer,ibm-granite/granite-timeseries-patchtsmixer,xfail,Only tensors lists tuples of tensors or dictionary of tensors can be output from traced functions
+patchtst,ibm/patchtst-etth1-pretrain,xfail,Conversion is failed for: aten::unfold
+pegasus,tuner007/pegasus_paraphrase
+pegasus_x,pszemraj/pegasus-x-large-book-summary
+perceiver,hf-internal-testing/tiny-random-vision_perceiver_conv
+phi,susnato/phi-1_5_dev
+phi3,MaziyarPanahi/calme-2.1-phi3-4b
+pix2struct,google/deplot
+pix2struct_vision_model,UiPath/pix2struct-vision-base
+plbart,uclanlp/plbart-base
+poolformer,sail/poolformer_m36
+prophetnet,microsoft/prophetnet-large-uncased-squad-qg
+pvt,Zetatech/pvt-tiny-224
+rag,facebook/rag-token-nq,xfail,Load error: Make sure that `context_input_ids` are passed if no `retriever` is set.
+realm,google/realm-orqa-nq-openqa,xfail,Load error: 'NoneType' object is not callable
+reformer,google/reformer-enwik8,xfail,Load error: index out of range in self
+regnet,facebook/regnet-x-002
+rembert,google/rembert
+resnet,microsoft/resnet-50
+roberta,deepset/roberta-base-squad2
+roberta-prelayernorm,andreasmadsen/efficient_mlm_m0.40
+roc_bert,weiweishi/roc-bert-base-zh
+roformer,junnyu/roformer_chinese_sim_char_small
+rt_detr,rafaelpadilla/porting_rt_detr,xfail,Trace error: Only tensors lists tuples of tensors or dictionary of tensors can be output from traced functions
+rwkv,RWKV/rwkv-4-169m-pile
+sam,facebook/sam-vit-huge
+scibert,Goutham-Vignesh/ContributionSentClassification-scibert
+seamless_m4t,facebook/hf-seamless-m4t-large
+segformer,mattmdjaga/segformer_b2_clothes
+seg-ment-tation,NiCy/seg-ment-tation
+sew,anton-l/sew-mid-100k-ft-keyword-spotting
+sew-d,asapp/sew-d-base-plus-400k-ft-ls100h
+siglip,google/siglip-base-patch16-224
+siglip_vision_model,bczhou/TinyLLaVA-3.1B-SigLIP
+speech_to_text,facebook/s2t-small-librispeech-asr
+speech_to_text_2,pirxus/s2t2_decoder_base
+speech-encoder-decoder,facebook/wav2vec2-xls-r-2b-22-to-16
+speecht5,microsoft/speecht5_tts,xfail,Unsupported op aten::bernoulli
+splinter,tau/splinter-base-qass
+squeezebert,typeform/squeezebert-mnli
+stablelm,pansophic/rocket-3B
+starcoder2,cognitivecomputations/dolphincoder-starcoder2-7b
+superpoint,stevenbucaille/superpoint,xfail,Unsupported prim::TupleConstruct prim::TupleUnpack
+swiftformer,MBZUAI/swiftformer-xs
+swin,microsoft/swin-tiny-patch4-window7-224
+swin2sr,caidas/swin2SR-classical-sr-x2-64
+swinv2,microsoft/swinv2-tiny-patch4-window8-256
+switch_transformers,google/switch-base-8,xfail,Unsupported aten::index_put_ aten::logsumexp prim::TupleConstruct prim::TupleUnpack
+t5,google/flan-t5-base
+table-transformer,microsoft/table-transformer-detection
+tapas,google/tapas-base-finetuned-wtq
+tapex,nielsr/tapex-large
+time_series_transformer,huggingface/time-series-transformer-tourism-monthly
+timesformer,facebook/timesformer-base-finetuned-k400
+transfo-xl,transfo-xl/transfo-xl-wt103,xfail,Load error: type_as() missing 1 required positional arguments: "other"
+tvlt,ZinengTang/tvlt-base,xfail,Load error: Matching task requires labels
+tvp,Intel/tvp-base,xfail,Load error: 'NoneType' object has no attribute 'dtype'
+udop,nielsr/udop-large,xfail,Trace error: We don't have an op for aten::full_like but it isn't a special case
+umt5,EleutherAI/pile-t5-large
+unispeech,microsoft/unispeech-1350-en-17h-ky-ft-1h
+unispeech-sat,microsoft/unispeech-sat-base-100h-libri-ft
+univnet,dg845/univnet-dev,xfail,Load error: "normal_kernel_cpu" not implemented for 'Long'
+upernet,openmmlab/upernet-convnext-small
+van,Visual-Attention-Network/van-tiny
+videomae,MCG-NJU/videomae-base-finetuned-kinetics
+vilt,dandelin/vilt-b32-finetuned-vqa,xfail,Accuracy validation failed
+vision-encoder-decoder,nlpconnect/vit-gpt2-image-captioning
+visual_bert,uclanlp/visualbert-vqa-coco-pre
+vit,google/vit-base-patch16-224
+vit_mae,facebook/vit-mae-base
+vit_msn,facebook/vit-msn-large-7
+vit-hybrid,google/vit-hybrid-base-bit-384
+vitmatte,nielsr/vitmatte-small-composition-1k,xfail,Unsupported aten::FloatImplicit aten::__contains__ aten::__isnot__ aten::append prim::Uninitialized prim::unchecked_cast
+vits,facebook/mms-tts-eng,xfail,operands could not be broadcast together with shapes
+vivit,google/vivit-b-16x2-kinetics400
+wav2vec2,jonatasgrosman/wav2vec2-large-xlsr-53-english
+wav2vec2_base,gelbanna/test,xfail,Load error: expected scalar type Long but found Float
+wav2vec2-conformer,codenamewei/speech-to-text
+wavlm,microsoft/wavlm-large
+whisper,openai/whisper-large-v3
+xclip,microsoft/xclip-base-patch32
+xglm,facebook/xglm-564M
+xlm,FacebookAI/xlm-mlm-100-1280
+xlm-prophetnet,microsoft/xprophetnet-large-wiki100-cased
+xlm-roberta,FacebookAI/xlm-roberta-base
+xlm-roberta-xl,facebook/xlm-roberta-xl
+xlnet,xlnet/xlnet-base-cased
+xmod,facebook/xmod-base
+yolos,hustvl/yolos-tiny
+yoso,MrAnderson/yoso-4096-full-trivia
diff --git a/tests/model_hub_tests/pytorch/test_hf_transformers.py b/tests/model_hub_tests/pytorch/test_hf_transformers.py
index 5e3f19ad945399..9898e3a2af8dfc 100644
--- a/tests/model_hub_tests/pytorch/test_hf_transformers.py
+++ b/tests/model_hub_tests/pytorch/test_hf_transformers.py
@@ -3,55 +3,26 @@
 
 import os
 
+from datasets import Audio, load_dataset
+from huggingface_hub import hf_hub_download, model_info
+from huggingface_hub.utils import HfHubHTTPError, LocalEntryNotFoundError
+from PIL import Image
 import pytest
 import torch
-from huggingface_hub import model_info
-from huggingface_hub.utils import HfHubHTTPError
-from models_hub_common.constants import hf_hub_cache_dir
-from models_hub_common.utils import cleanup_dir, retry
 import transformers
-from transformers import AutoConfig, AutoModel, AutoProcessor, AutoTokenizer, AutoFeatureExtractor, AutoModelForTextToWaveform, \
-    CLIPFeatureExtractor, XCLIPVisionModel, T5Tokenizer, VisionEncoderDecoderModel, ViTImageProcessor, BlipProcessor, BlipForConditionalGeneration, \
-    SpeechT5Processor, SpeechT5ForTextToSpeech, LayoutLMv2Processor, Pix2StructForConditionalGeneration, RetriBertTokenizer, VivitImageProcessor
-
-from torch_utils import TestTorchConvertModel, process_pytest_marks
-
-def is_gptq_model(config):
-    config_dict = config.to_dict() if not isinstance(config, dict) else config
-    quantization_config = config_dict.get("quantization_config", None)
-    return quantization_config and quantization_config["quant_method"] == "gptq"
-
-
-def patch_gptq():
-    orig_cuda_check = torch.cuda.is_available
-    orig_post_init_model = None
-    torch.set_default_dtype(torch.float32)
-    torch.cuda.is_available = lambda: True
+from transformers import (
+    AutoConfig, AutoFeatureExtractor, AutoImageProcessor, AutoModel,
+    AutoModelForTextToWaveform, AutoProcessor, AutoTokenizer,
+    BlipForConditionalGeneration, BlipProcessor, CLIPFeatureExtractor,
+    FlavaImageModel, LayoutLMv2Processor, Pix2StructForConditionalGeneration,
+    RetriBertTokenizer, SpeechT5ForTextToSpeech, SpeechT5Processor,
+    T5Tokenizer, ViTImageProcessor, VisionEncoderDecoderModel,
+    VivitImageProcessor, XCLIPVisionModel
+)
 
-    from optimum.gptq import GPTQQuantizer
-
-    orig_post_init_model = GPTQQuantizer.post_init_model
-
-    def post_init_model(self, model):
-        from auto_gptq import exllama_set_max_input_length
-
-        class StoreAttr(object):
-            pass
-
-        model.quantize_config = StoreAttr()
-        model.quantize_config.desc_act = self.desc_act
-        if self.desc_act and not self.disable_exllama and self.max_input_length is not None:
-            model = exllama_set_max_input_length(model, self.max_input_length)
-        return model
-
-    GPTQQuantizer.post_init_model = post_init_model
-    return orig_cuda_check, orig_post_init_model
-
-
-def unpatch_gptq(orig_cuda_check, orig_post_init_model):
-    from optimum.gptq import GPTQQuantizer
-    torch.cuda.is_available = orig_cuda_check
-    GPTQQuantizer.post_init_model = orig_post_init_model
+from models_hub_common.constants import hf_hub_cache_dir
+from models_hub_common.utils import cleanup_dir, get_models_list, retry
+from torch_utils import TestTorchConvertModel
 
 
 def flattenize_tuples(list_input):
@@ -72,42 +43,22 @@ def flattenize_outputs(outputs):
         return dict((k, v.numpy(force=True)) for k, v in outputs.items())
 
 
-def filter_example(model, example):
-    try:
-        import inspect
-        if isinstance(example, dict):
-            model_params = inspect.signature(model.forward).parameters
-            names_set = {p for p in model_params}
-            new_example = dict()
-            for k, v in example:
-                if k in names_set:
-                    new_example[k] = v
-        return new_example
-    except:
-        return example
-
-
 # To make tests reproducible we seed the random generator
 torch.manual_seed(0)
 
 
 class TestTransformersModel(TestTorchConvertModel):
     def setup_class(self):
-        from PIL import Image
         import requests
 
         self.infer_timeout = 1800
 
         url = "http://images.cocodataset.org/val2017/000000039769.jpg"
         self.image = Image.open(requests.get(url, stream=True).raw)
-        self.cuda_available, self.gptq_postinit = None, None
 
-    @retry(3, exceptions=(HfHubHTTPError,), delay=1)
+    @retry(3, exceptions=(HfHubHTTPError, LocalEntryNotFoundError), delay=1)
     def load_model(self, name, type):
-        name_suffix = ''
-        if name.find(':') != -1:
-            name_suffix = name[name.find(':') + 1:]
-            name = name[:name.find(':')]
+        name, _, name_suffix = name.partition(':')
 
         mi = model_info(name)
         auto_processor = None
@@ -117,12 +68,7 @@ def load_model(self, name, type):
             config = AutoConfig.from_pretrained(name)
         except Exception:
             config = {}
-        is_gptq = is_gptq_model(config)
         model_kwargs = {"torchscript": True}
-        if is_gptq:
-            self.cuda_available, self.gptq_postinit = patch_gptq()
-            model_kwargs["torch_dtype"] = torch.float32
-            self.ov_config = {"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"}
         if "bart" in mi.tags:
             model_kwargs["attn_implementation"] = "eager"
         try:
@@ -137,10 +83,9 @@ def load_model(self, name, type):
             example = dict(encoded_input)
         elif 'xclip' in mi.tags:
             model = XCLIPVisionModel.from_pretrained(name, **model_kwargs)
-            # needs video as input
-            example = {'pixel_values': torch.randn(*(16, 3, 224, 224), dtype=torch.float32)}
+            example = {'pixel_values': torch.randn(16, 3, 224, 224)}
         elif 'audio-spectrogram-transformer' in mi.tags:
-            example = {'input_values': torch.randn(*(1, 1024, 128), dtype=torch.float32)}
+            example = {'input_values': torch.randn(1, 1024, 128)}
         elif 'mega' in mi.tags:
             model = AutoModel.from_pretrained(name, **model_kwargs)
             model.config.output_attentions = True
@@ -150,33 +95,35 @@ def load_model(self, name, type):
         elif 'bros' in mi.tags:
             processor = AutoProcessor.from_pretrained(name)
             encoding = processor("to the moon!", return_tensors="pt")
-            bbox = torch.randn([1, 6, 8], dtype=torch.float32)
-            example = dict(input_ids=encoding["input_ids"], bbox=bbox, attention_mask=encoding["attention_mask"])
+            bbox = torch.randn([1, 6, 8])
+            example = dict(
+                input_ids=encoding["input_ids"], bbox=bbox, attention_mask=encoding["attention_mask"])
         elif 'upernet' in mi.tags:
             processor = AutoProcessor.from_pretrained(name)
             example = dict(processor(images=self.image, return_tensors="pt"))
         elif 'deformable_detr' in mi.tags or 'oneformer' in mi.tags:
             processor = AutoProcessor.from_pretrained(name)
-            example = dict(processor(images=self.image, task_inputs=["semantic"], return_tensors="pt"))
+            example = dict(processor(images=self.image, task_inputs=[
+                           "semantic"], return_tensors="pt"))
         elif 'clap' in mi.tags:
             example_inputs_map = {
-                'audio_model': {'input_features': torch.randn([1, 1, 1001, 64], dtype=torch.float32)},
-                'audio_projection': {'hidden_states': torch.randn([1, 768], dtype=torch.float32)},
+                'audio_model': {'input_features': torch.randn([1, 1, 1001, 64])},
+                'audio_projection': {'hidden_states': torch.randn([1, 768])},
             }
             example = example_inputs_map[name_suffix]
         elif 'git' in mi.tags:
             processor = AutoProcessor.from_pretrained(name)
-            example = {'pixel_values': torch.randn(*(1, 3, 224, 224), dtype=torch.float32), 
+            example = {'pixel_values': torch.randn(1, 3, 224, 224),
                        'input_ids': torch.randint(1, 100, size=(1, 13), dtype=torch.int64)}
         elif 'blip-2' in mi.tags:
             processor = AutoProcessor.from_pretrained(name)
             example = dict(processor(images=self.image, return_tensors="pt"))
             example_inputs_map = {
-                'vision_model' :  {'pixel_values': torch.randn([1, 3, 224, 224], dtype=torch.float32)},
-                'qformer': {'query_embeds' : torch.randn([1, 32, 768], dtype=torch.float32), 
-                            'encoder_hidden_states' : torch.randn([1, 257, 1408], dtype=torch.float32),
-                            'encoder_attention_mask' : torch.ones([1, 257], dtype=torch.int64)},
-                'language_projection': {'input' : torch.randn([1, 32, 768], dtype=torch.float32)},
+                'vision_model':  {'pixel_values': torch.randn([1, 3, 224, 224])},
+                'qformer': {'query_embeds': torch.randn([1, 32, 768]),
+                            'encoder_hidden_states': torch.randn([1, 257, 1408]),
+                            'encoder_attention_mask': torch.ones([1, 257])},
+                'language_projection': {'input': torch.randn([1, 32, 768])},
             }
             example = example_inputs_map[name_suffix]
         elif "t5" in mi.tags:
@@ -198,10 +145,11 @@ def load_model(self, name, type):
 
             example = dict(encoded_input)
             example["decoder_input_ids"] = torch.randint(0, 1000, [1, 20])
-            example["decoder_attention_mask"] = torch.ones([1, 20], dtype=torch.int64)
+            example["decoder_attention_mask"] = torch.ones(
+                [1, 20], dtype=torch.int64)
         elif 'idefics' in mi.tags:
             processor = AutoProcessor.from_pretrained(name)
-            
+
             prompts = [[
                 "User: What is in this image?",
                 "https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG",
@@ -216,47 +164,59 @@ def load_model(self, name, type):
                 "\nAssistant:",
             ]]
 
-            inputs = processor(prompts, add_end_of_utterance_token=False, return_tensors="pt")
+            inputs = processor(
+                prompts, add_end_of_utterance_token=False, return_tensors="pt")
             example = dict(inputs)
         elif 'blip' in mi.tags and 'text2text-generation' in mi.tags:
             processor = BlipProcessor.from_pretrained(name)
-            model = BlipForConditionalGeneration.from_pretrained(name, **model_kwargs)
+            model = BlipForConditionalGeneration.from_pretrained(
+                name, **model_kwargs)
             text = "a photography of"
             inputs = processor(self.image, text, return_tensors="pt")
             example = dict(inputs)
         elif 'speecht5' in mi.tags:
-            from datasets import load_dataset
-
             processor = SpeechT5Processor.from_pretrained(name)
-            model = SpeechT5ForTextToSpeech.from_pretrained(name, **model_kwargs)
+            model = SpeechT5ForTextToSpeech.from_pretrained(
+                name, **model_kwargs)
 
-            inputs = processor(text="Hello, my dog is cute.", return_tensors="pt")
+            inputs = processor(text="Hello, my dog is cute.",
+                               return_tensors="pt")
             # load xvector containing speaker's voice characteristics from a dataset
-            embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
-            speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+            embeddings_dataset = load_dataset(
+                "Matthijs/cmu-arctic-xvectors", split="validation")
+            speaker_embeddings = torch.tensor(
+                embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 
             example = dict(inputs)
             example['speaker_embeddings'] = speaker_embeddings
-            example['decoder_input_values'] = torch.randn([1, 20, model.config.num_mel_bins])
+            example['decoder_input_values'] = torch.randn(
+                [1, 20, model.config.num_mel_bins])
         elif 'layoutlmv2' in mi.tags:
             processor = LayoutLMv2Processor.from_pretrained(name)
 
             question = "What's the content of this image?"
-            encoding = processor(self.image, question, max_length=512, truncation=True, return_tensors="pt")
+            encoding = processor(
+                self.image, question, max_length=512, truncation=True, return_tensors="pt")
             example = dict(encoding)
         elif 'pix2struct' in mi.tags:
-            model = Pix2StructForConditionalGeneration.from_pretrained(name, **model_kwargs)
+            model = Pix2StructForConditionalGeneration.from_pretrained(
+                name, **model_kwargs)
             processor = AutoProcessor.from_pretrained(name)
 
             import requests
-            from PIL import Image
             image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
             image = Image.open(requests.get(image_url, stream=True).raw)
             question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
-            inputs = processor(images=image, text=question, return_tensors="pt")
+            inputs = processor(images=image, text=question,
+                               return_tensors="pt")
             example = dict(inputs)
             example["decoder_input_ids"] = torch.randint(0, 1000, [1, 20])
-            example["decoder_attention_mask"] = torch.ones([1, 20], dtype=torch.int64)
+            example["decoder_attention_mask"] = torch.ones(
+                [1, 20], dtype=torch.int64)
+        elif "pix2struct_vision_model" in mi.tags:
+            image_processor = AutoProcessor.from_pretrained("google/pix2struct-textcaps-base")
+            inputs = image_processor(images=self.image, return_tensors="pt")
+            example = dict(inputs)
         elif "mms-lid" in name:
             processor = AutoFeatureExtractor.from_pretrained(name)
             input_values = processor(torch.randn(16000).numpy(),
@@ -275,10 +235,11 @@ def load_model(self, name, type):
             encoded_input = processor(images=self.image, return_tensors="pt")
             example = (encoded_input.pixel_values,)
         elif "flava" in mi.tags:
-            processor = AutoProcessor.from_pretrained(name)
-            encoded_input = processor(text=["a photo of a cat", "a photo of a dog"],
-                                      images=[self.image, self.image],
-                                      return_tensors="pt")
+            model = FlavaImageModel.from_pretrained(name, **model_kwargs)
+            feature_extractor = AutoFeatureExtractor.from_pretrained(name)
+
+            encoded_input = feature_extractor(images=[self.image],
+                                              return_tensors="pt")
             example = dict(encoded_input)
         elif "vivit" in mi.tags:
             frames = list(torch.randint(
@@ -311,10 +272,11 @@ def load_model(self, name, type):
             text = "some example text in the English language"
             inputs = tokenizer(text, return_tensors="pt")
             example = dict(inputs)
-        elif 'musicgen' in mi.tags:
+        elif 'musicgen' in mi.tags or "musicgen_melody" in mi.tags:
             processor = AutoProcessor.from_pretrained(name)
-            model = AutoModelForTextToWaveform.from_pretrained(name, **model_kwargs)
-            
+            model = AutoModelForTextToWaveform.from_pretrained(
+                name, **model_kwargs)
+
             inputs = processor(
                 text=["80s pop track with bassy drums and synth"],
                 padding=True,
@@ -325,12 +287,92 @@ def load_model(self, name, type):
             pad_token_id = model.generation_config.pad_token_id
             example["decoder_input_ids"] = torch.ones(
                 (inputs.input_ids.shape[0] * model.decoder.num_codebooks, 1), dtype=torch.long) * pad_token_id
-        elif 'kosmos-2' in mi.tags:
+        elif 'kosmos-2' in mi.tags or 'instructblip' in mi.tags:
             processor = AutoProcessor.from_pretrained(name)
 
             prompt = "<grounding>An image of"
-            inputs = processor(text=prompt, images=self.image, return_tensors="pt")
+            inputs = processor(
+                text=prompt, images=self.image, return_tensors="pt")
             example = dict(inputs)
+        elif 'vitmatte' in mi.tags:
+            processor = AutoImageProcessor.from_pretrained(name)
+            filepath = hf_hub_download(repo_id="hf-internal-testing/image-matting-fixtures",
+                                       filename="image.png",
+                                       repo_type="dataset")
+            image = Image.open(filepath).convert("RGB")
+            filepath = hf_hub_download(repo_id="hf-internal-testing/image-matting-fixtures",
+                                       filename="trimap.png",
+                                       repo_type="dataset")
+            trimap = Image.open(filepath).convert("L")
+            inputs = processor(images=image, trimaps=trimap,
+                               return_tensors="pt")
+            example = dict(inputs)
+        elif 'sam' in mi.tags:
+            processor = AutoProcessor.from_pretrained(name)
+            input_points = [[[450, 600]]]
+            inputs = processor(self.image,
+                               input_points=input_points,
+                               return_tensors="pt")
+            example = dict(inputs)
+            if "original_sizes" in example:
+                del example["original_sizes"]
+            if "reshaped_input_sizes" in example:
+                del example["reshaped_input_sizes"]
+        elif 'udop' in mi.tags:
+            processor = AutoProcessor.from_pretrained(name, apply_ocr=False)
+            dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
+            example = dataset[0]
+            image = example["image"]
+            words = example["tokens"]
+            boxes = example["bboxes"]
+            inputs = processor(image, words, boxes=boxes, return_tensors="pt")
+            decoder_input_ids = torch.tensor([[config.decoder_start_token_id]])
+            example = dict(decoder_input_ids=decoder_input_ids,
+                           decoder_attention_mask=torch.tensor([[True]]), **inputs)
+        elif 'clvp' in mi.tags:
+            text = "This is an example text."
+            ds = load_dataset("hf-internal-testing/librispeech_asr_dummy",
+                              "clean", split="validation")
+            ds = ds.cast_column("audio", Audio(sampling_rate=22050))
+            sorted_audio = ds.sort("id").select(range(1))[:1]["audio"][0]
+            _, audio, sr = sorted_audio.values()
+            processor = AutoProcessor.from_pretrained(name)
+            inputs = processor(raw_speech=audio, sampling_rate=sr,
+                               text=text, return_tensors="pt")
+            example = dict(inputs)
+        elif "decision_transformer" in mi.tags:
+            states = torch.randn(1, 1, config.state_dim)
+            actions = torch.zeros((1, 1, config.act_dim), dtype=torch.float32)
+            rewards = torch.zeros(1, 1, dtype=torch.float32)
+            target_return = torch.randn(1, 1, 1)
+            timesteps = torch.tensor(0, dtype=torch.long).reshape(1, 1)
+            attention_mask = torch.zeros(1, 1, dtype=torch.float32)
+            example = dict(states=states,
+                           actions=actions,
+                           rewards=rewards,
+                           returns_to_go=target_return,
+                           timesteps=timesteps,
+                           attention_mask=attention_mask)
+        elif "time_series_transformer" in mi.tags or "informer" in mi.tags or "autoformer" in mi.tags:
+            file = hf_hub_download(repo_id="hf-internal-testing/tourism-monthly-batch",
+                                   filename="train-batch.pt", repo_type="dataset")
+            batch = torch.load(file)
+            example = dict(past_values=batch["past_values"],
+                           past_time_features=batch["past_time_features"],
+                           past_observed_mask=batch["past_observed_mask"],
+                           static_categorical_features=batch["static_categorical_features"],
+                           static_real_features=batch["static_real_features"],
+                           future_time_features=batch["future_time_features"]
+                           )
+        elif "seg-ment-tation" in mi.tags:
+            image_processor = AutoImageProcessor.from_pretrained(name)
+            inputs = image_processor(images=self.image, return_tensors="pt")
+            example = dict(inputs)
+        elif "lxmert" in mi.tags:
+            example = {"input_ids": torch.randint(1, 1000, [1, 20]),
+                       "attention_mask": torch.ones([1, 20], dtype=torch.bool),
+                       "visual_feats": torch.randn(1, 10, config.visual_feat_dim),
+                       "visual_pos": torch.randn(1, 10, config.visual_pos_dim)}
         else:
             try:
                 if auto_model == "AutoModelForCausalLM":
@@ -362,14 +404,12 @@ def load_model(self, name, type):
                     example = dict(input_ids=inputs.input_ids,
                                    decoder_input_ids=decoder_inputs.input_ids)
                 elif auto_model == "AutoModelForSpeechSeq2Seq":
-                    from datasets import load_dataset
                     processor = AutoProcessor.from_pretrained(name)
                     inputs = processor(torch.randn(1000).numpy(),
                                        sampling_rate=16000,
                                        return_tensors="pt")
                     example = dict(inputs)
                 elif auto_model == "AutoModelForCTC":
-                    from datasets import load_dataset
                     processor = AutoProcessor.from_pretrained(name)
                     input_values = processor(torch.randn(1000).numpy(),
                                              return_tensors="pt")
@@ -382,8 +422,8 @@ def load_model(self, name, type):
                     queries = ["What is the name of the first actor?",
                                "How many movies has George Clooney played in?",
                                "What is the total number of movies?", ]
-                    answer_coordinates = [[(0, 0)], [(2, 1)], [
-                        (0, 1), (1, 1), (2, 1)]]
+                    answer_coordinates = [[(0, 0)], [(2, 1)],
+                                          [(0, 1), (1, 1), (2, 1)]]
                     answer_text = [["Brad Pitt"], ["69"], ["209"]]
                     table = pd.DataFrame.from_dict(data)
                     encoded_input = tokenizer(table=table, queries=queries, answer_coordinates=answer_coordinates,
@@ -407,14 +447,44 @@ def load_model(self, name, type):
             model = self.load_model_with_default_class(name, **model_kwargs)
         if hasattr(model, "set_default_language"):
             model.set_default_language("en_XX")
+        if hasattr(model, "config") and hasattr(model.config, "return_loss"):
+            model.config.return_loss = False
         if name_suffix != '':
             model = model._modules[name_suffix]
         if example is None:
             if "encodec" in mi.tags:
                 example = (torch.randn(1, 1, 100),)
+            elif len({"blip_2_vision_model", "vit-hybrid", "siglip_vision_model", "flava_image_codebook", "superpoint", "donut-swin", "hiera"}.intersection(mi.tags)):
+                image_size = getattr(model.config, "image_size", 384)
+                if not isinstance(image_size, (list, tuple)):
+                    image_size = [image_size, image_size]
+                example = {"pixel_values": torch.randn(1, 3, image_size[0],
+                                                       image_size[1])}
+            elif len({"LanguageBindDepth", "LanguageBindImage", "LanguageBindThermal"}.intersection(mi.tags)):
+                image_size = getattr(model.config.vision_config,
+                                     "image_size", 384)
+                example = {"input_ids": torch.randint(0, 1000, [1, 20]),
+                           "pixel_values": torch.randn(1, 3, image_size, image_size)}
+            elif len({"speech-encoder-decoder", "wav2vec2", "unispeech", "wavlm", "data2vec-audio", "sew"}.intersection(mi.tags)):
+                example = {"input_values": torch.rand(1, 1000)}
+            elif "blip_2_qformer" in mi.tags:
+                example = {"query_embeds": torch.randn(1, 20, model.config.hidden_size),
+                           "attention_mask": torch.ones([1, 20]),
+                           "encoder_hidden_states": torch.randn(1, 20, model.config.encoder_hidden_size),
+                           "encoder_attention_mask": torch.ones([1, 20])}
+            elif "patchtsmixer" in mi.tags or "patchtst" in mi.tags:
+                example = {"past_values": torch.rand(1, model.config.context_length,
+                                                     model.config.num_input_channels)}
             else:
                 example = (torch.randint(1, 1000, [1, 100]),)
-        self.example = filter_example(model, example)
+        if len({"seamless_m4t", "whisper", "speech_to_text", "speech-encoder-decoder"}.intersection(mi.tags)):
+            example["decoder_input_ids"] = torch.randint(0, 1000, [1, 20])
+            example["decoder_attention_mask"] = torch.ones(
+                [1, 20], dtype=torch.int64)
+
+        if "hybridbert" in mi.tags and "token_type_ids" in example:
+            del example["token_type_ids"]
+        self.example = example
         if "vit_mae" in mi.tags:
             # vit-mae by default will generate random noise
             self.example["noise"] = torch.rand(1, 192)
@@ -429,20 +499,19 @@ def load_model(self, name, type):
     def teardown_method(self):
         # remove all downloaded files from cache
         cleanup_dir(hf_hub_cache_dir)
-        # restore after gptq patching
-        if self.cuda_available is not None:
-            unpatch_gptq(self.cuda_available, self.gptq_postinit)
-            self.cuda_available, self.gptq_postinit = None, None
+
         super().teardown_method()
 
     @staticmethod
     def load_model_with_default_class(name, **kwargs):
         try:
             mi = model_info(name)
-            assert len({"owlv2", "owlvit", "vit_mae"}.intersection(mi.tags)) == 0, "TBD: support default classes of these models"
-            assert "architectures" in mi.config and len(mi.config["architectures"]) == 1
+            assert len({"owlv2", "owlvit", "vit_mae"}.intersection(
+                mi.tags)) == 0, "TBD: support default classes of these models"
+            assert "architectures" in mi.config and len(
+                mi.config["architectures"]) == 1
             class_name = mi.config["architectures"][0]
-            model_class = transformers.__getattr__(class_name)
+            model_class = getattr(transformers, class_name)
             return model_class.from_pretrained(name, **kwargs)
         except:
             return AutoModel.from_pretrained(name, **kwargs)
@@ -453,14 +522,19 @@ def load_model_with_default_class(name, **kwargs):
                                            ("google/tapas-large-finetuned-wtq", "tapas"),
                                            ("gpt2", "gpt2"),
                                            ("openai/clip-vit-large-patch14", "clip"),
-                                           ("katuni4ka/opt-125m-gptq", "opt"),
                                            ])
     @pytest.mark.precommit
     def test_convert_model_precommit(self, name, type, ie_device):
         self.run(model_name=name, model_link=type, ie_device=ie_device)
 
-    @pytest.mark.parametrize("name",
-                             process_pytest_marks(os.path.join(os.path.dirname(__file__), "hf_transformers_models")))
+    @pytest.mark.parametrize("type,name,mark,reason",
+                             get_models_list(os.path.join(os.path.dirname(__file__), "hf_transformers_models")))
     @pytest.mark.nightly
-    def test_convert_model_all_models(self, name, ie_device):
+    def test_convert_model_all_models(self, name, type, mark, reason, ie_device):
+        valid_marks = ['skip', 'xfail']
+        assert mark is None or mark in valid_marks, f"Invalid case for {name}"
+        if mark == 'skip':
+            pytest.skip(reason)
+        elif mark == 'xfail':
+            pytest.xfail(reason)
         self.run(model_name=name, model_link=None, ie_device=ie_device)
diff --git a/tests/model_hub_tests/pytorch/test_llm.py b/tests/model_hub_tests/pytorch/test_llm.py
new file mode 100644
index 00000000000000..43975500455967
--- /dev/null
+++ b/tests/model_hub_tests/pytorch/test_llm.py
@@ -0,0 +1,224 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import copy
+import inspect
+
+import numpy as np
+import pytest
+import torch
+from huggingface_hub.utils import HfHubHTTPError, LocalEntryNotFoundError
+from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM
+
+from models_hub_common.utils import retry
+from openvino.frontend.pytorch.patch_model import __make_16bit_traceable as patch
+from openvino.frontend.pytorch.patch_model import unpatch_model as unpatch
+from torch_utils import TestTorchConvertModel
+
+
+def is_gptq_model(config):
+    config_dict = config.to_dict() if not isinstance(config, dict) else config
+    quantization_config = config_dict.get("quantization_config", None)
+    return quantization_config and quantization_config["quant_method"] == "gptq"
+
+
+def patch_gptq():
+    orig_cuda_is_available = torch.cuda.is_available
+    orig_cuda_is_bf16_supported = torch.cuda.is_bf16_supported
+    orig_cuda_get_device_capability = torch.cuda.get_device_capability
+    orig_post_init_model = None
+    torch.set_default_dtype(torch.float32)
+    torch.cuda.is_available = lambda: True
+    torch.cuda.is_bf16_supported = lambda: False
+    torch.cuda.get_device_capability = lambda n: (9, 1)
+
+    from optimum.gptq import GPTQQuantizer
+
+    orig_post_init_model = GPTQQuantizer.post_init_model
+
+    def post_init_model(self, model):
+        from auto_gptq import exllama_set_max_input_length
+
+        class StoreAttr(object):
+            pass
+
+        model.quantize_config = StoreAttr()
+        model.quantize_config.desc_act = self.desc_act
+        if self.desc_act and not self.disable_exllama and self.max_input_length is not None:
+            model = exllama_set_max_input_length(model, self.max_input_length)
+        return model
+
+    GPTQQuantizer.post_init_model = post_init_model
+    return (orig_cuda_is_available, orig_cuda_is_bf16_supported, orig_cuda_get_device_capability), orig_post_init_model
+
+
+def unpatch_gptq(orig_cuda_check, orig_post_init_model):
+    from optimum.gptq import GPTQQuantizer
+    torch.cuda.is_available, torch.cuda.is_bf16_supported, torch.cuda.get_device_capability = orig_cuda_check
+    GPTQQuantizer.post_init_model = orig_post_init_model
+
+
+def to_numpy(t):
+    if t.dtype in [torch.bfloat16, torch.float16]:
+        return t.to(torch.float32).numpy(force=True)
+    return t.numpy(force=True)
+
+
+def flattenize_tuples(list_input):
+    unpacked_pt_res = []
+    for r in list_input:
+        if isinstance(r, (tuple, list)):
+            unpacked_pt_res.extend(flattenize_tuples(r))
+        else:
+            unpacked_pt_res.append(r)
+    return unpacked_pt_res
+
+
+def flattenize_outputs(outputs):
+    if not isinstance(outputs, dict):
+        outputs = flattenize_tuples(outputs)
+        return [to_numpy(i) for i in outputs]
+    else:
+        return dict((k, to_numpy(v)) for k, v in outputs.items())
+
+
+# To make tests reproducible we seed the random generator
+torch.manual_seed(0)
+
+
+class TestLLMModel(TestTorchConvertModel):
+    def setup_class(self):
+        self.infer_timeout = 1800
+        self.cuda_available, self.gptq_postinit = None, None
+
+    @retry(3, exceptions=(HfHubHTTPError, LocalEntryNotFoundError), delay=1)
+    def load_model(self, name, type):
+        model = None
+        example = None
+        try:
+            config = AutoConfig.from_pretrained(name, trust_remote_code=True)
+        except Exception:
+            config = {}
+        model_kwargs = {"torchscript": True, "trust_remote_code": True}
+        is_gptq = is_gptq_model(config)
+        if is_gptq:
+            self.cuda_available, self.gptq_postinit = patch_gptq()
+            model_kwargs["torch_dtype"] = torch.float32
+            self.ov_config = {"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"}
+        else:
+            model_kwargs["torch_dtype"] = "auto"
+            pass
+
+        t = AutoTokenizer.from_pretrained(name, trust_remote_code=True)
+        self.model = AutoModelForCausalLM.from_pretrained(name, **model_kwargs)
+        if is_gptq:
+            model = self.model
+        else:
+            assert self.model.config.torch_dtype in [
+                torch.float16, torch.bfloat16]
+            model = copy.deepcopy(self.model).float()
+
+        example = t("Some input text to verify that model works.",
+                    return_tensors='pt').__dict__['data']
+        if type != "gptj":
+            pkv, am = self.get_pkv(model, t)
+            example["past_key_values"] = pkv
+            example["attention_mask"] = torch.cat(
+                [example["attention_mask"], am], -1)
+        if type not in ["opt", "falcon"]:
+            ids = torch.cumsum(example["attention_mask"] != 0, dim=1) - 1
+            example["position_ids"] = ids[:, -
+                                          example["input_ids"].shape[1]:]
+        self.example = example
+        return model
+
+    def get_inputs_info(self, model_obj):
+        return list(inspect.signature(getattr(model_obj, "forward", model_obj.__call__)).parameters)
+
+    def prepare_inputs(self, inputs_info):
+        inputs = getattr(self, "inputs", self.example)
+        filtered_keys = [i for i in inputs_info if i in inputs]
+        res = []
+        for k in filtered_keys:
+            v = inputs[k]
+            if isinstance(v, tuple):
+                v_flatten = flattenize_outputs(v)
+                if k == "past_key_values":
+                    v_flatten = [v.astype(np.float32) for v in v_flatten]
+                res.extend(v_flatten)
+            else:
+                res.append(v.numpy())
+        return res
+
+    def infer_fw_model(self, model_obj, inputs):
+        inputs = getattr(self, "inputs", self.example)
+        fw_outputs = model_obj(**inputs)
+        return flattenize_outputs(fw_outputs)
+
+    def convert_model_impl(self, model_obj):
+        is_patched = False
+        if getattr(self.model.config, "torch_dtype", None) in [torch.float16, torch.bfloat16]:
+            patch(self.model)
+            is_patched = True
+        # initialize model after patching
+        self.model(**self.example)
+        with torch.no_grad():
+            ovm = super().convert_model_impl(self.model)
+        if is_patched:
+            unpatch(self.model, "_openvino_module_extension_patch_orig_forward")
+        #    model_obj.float()
+        return ovm
+
+    def teardown_method(self):
+        # restore after gptq patching
+        if self.cuda_available is not None:
+            unpatch_gptq(self.cuda_available, self.gptq_postinit)
+            self.cuda_available, self.gptq_postinit = None, None
+        super().teardown_method()
+
+    @staticmethod
+    def get_pkv(model, tokenizer):
+        for_pkv = tokenizer("To get past key values",
+                            return_tensors='pt').__dict__['data']
+        with torch.no_grad():
+            pkv = model(**for_pkv)[1]
+
+        return pkv, for_pkv["attention_mask"]
+
+    @pytest.mark.parametrize("type,name", [
+        ("opt", "katuni4ka/opt-125m-gptq"),
+        ("llama", "TinyLlama/TinyLlama-1.1B-Chat-v1.0"),
+    ])
+    @pytest.mark.precommit
+    @pytest.mark.nightly
+    def test_convert_model_precommit(self, name, type, ie_device):
+        self.run(model_name=name, model_link=type, ie_device=ie_device)
+
+    @pytest.mark.parametrize("type,name", [
+        ("baichuan", "baichuan-inc/Baichuan2-7B-Base"),
+        pytest.param("chatglm", "THUDM/chatglm3-6b",
+                     marks=pytest.mark.xfail(reason="Accuracy validation failed")),
+        ("falcon", "tiiuae/falcon-7b-instruct"),
+        ("gemma", "beomi/gemma-ko-7b"),
+        ("gpt_neox", "EleutherAI/gpt-neox-20b"),
+        ("gpt_neox", "togethercomputer/RedPajama-INCITE-7B-Instruct"),
+        ("gpt_neox_japanese", "rinna/japanese-gpt-neox-3.6b"),
+        #pytest.param("gptj", "databricks/dolly-v1-6b",marks=pytest.mark.xfail(reason="prim::Constant")),
+        ("llama", "lmsys/vicuna-7b-v1.5"),
+        ("llama-2", "TheBloke/Llama-2-7B-GPTQ"),
+        pytest.param("llama-3.1", "hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4",
+                     marks=pytest.mark.xfail(reason="Accuracy validation failed")),
+        pytest.param("mpt", "mosaicml/mpt-7b",
+                     marks=pytest.mark.xfail(reason="tuple index out of range")),
+        ("opt", "facebook/opt-1.3b"),
+        ("persimmon", "adept/persimmon-8b-base"),
+        ("phi", "microsoft/phi-2"),
+        ("phi3", "microsoft/Phi-3-mini-4k-instruct"),
+        pytest.param("qwen", "TheBloke/Qwen-7B-Chat-GPTQ",
+                     marks=pytest.mark.xfail(reason="Accuracy validation failed")),
+        ("qwen2", "Qwen/Qwen2-0.5B-Instruct"),
+        ("stablelm", "stabilityai/stablelm-3b-4e1t"),
+    ])
+    @pytest.mark.nightly
+    def test_convert_model_nightly(self, name, type, ie_device):
+        self.run(model_name=name, model_link=type, ie_device=ie_device)
diff --git a/tests/model_hub_tests/pytorch/test_timm.py b/tests/model_hub_tests/pytorch/test_timm.py
index 78bd632179be6f..824374514b61c3 100644
--- a/tests/model_hub_tests/pytorch/test_timm.py
+++ b/tests/model_hub_tests/pytorch/test_timm.py
@@ -6,7 +6,8 @@
 import pytest
 import timm
 import torch
-from models_hub_common.utils import get_models_list
+from huggingface_hub.utils import HfHubHTTPError, LocalEntryNotFoundError
+from models_hub_common.utils import get_models_list, retry
 
 from torch_utils import TestTorchConvertModel
 
@@ -47,6 +48,7 @@ def filter_timm(timm_list: list) -> list:
 
 
 class TestTimmConvertModel(TestTorchConvertModel):
+    @retry(3, exceptions=(HfHubHTTPError, LocalEntryNotFoundError), delay=1)
     def load_model(self, model_name, model_link):
         m = timm.create_model(model_name, pretrained=True)
         cfg = timm.get_pretrained_cfg(model_name)

From 6b4b54edadb8af1889c8ce78b3586052ae4e1c16 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 13:04:50 +0400
Subject: [PATCH 07/10] Bump actions/upload-artifact from 4.3.3 to 4.3.4
 (#25788)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[actions/upload-artifact](https://github.com/actions/upload-artifact)
from 4.3.3 to 4.3.4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/upload-artifact/releases">actions/upload-artifact's
releases</a>.</em></p>
<blockquote>
<h2>v4.3.4</h2>
<h2>What's Changed</h2>
<ul>
<li>Update <code>@​actions/artifact</code> version, bump dependencies by
<a href="https://github.com/robherley"><code>@​robherley</code></a> in
<a
href="https://redirect.github.com/actions/upload-artifact/pull/584">actions/upload-artifact#584</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/upload-artifact/compare/v4.3.3...v4.3.4">https://github.com/actions/upload-artifact/compare/v4.3.3...v4.3.4</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/upload-artifact/commit/0b2256b8c012f0828dc542b3febcab082c67f72b"><code>0b2256b</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/upload-artifact/issues/584">#584</a>
from actions/robherley/bump-pkgs</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/488dcefb9bf01619ac19bad29c5c5409a1e4dd4c"><code>488dcef</code></a>
licensed cache</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/04c51f57662651dd3333286989e2db1111c0fd07"><code>04c51f5</code></a>
ncc</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/32a9e276a8f8ac18b4b2dce8213ed340ed4e5ed8"><code>32a9e27</code></a>
bump <code>@​actions/artifact</code> and npm audit</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/552bf3722c16e81001aea7db72d8cedf64eb5f68"><code>552bf37</code></a>
new version</li>
<li><a
href="https://github.com/actions/upload-artifact/commit/79616d2ded92999fceefea2ca2e4bdf6101fa919"><code>79616d2</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/upload-artifact/issues/565">#565</a>
from actions/eggyhead/use-artifact-v2.1.6</li>
<li>See full diff in <a
href="https://github.com/actions/upload-artifact/compare/v4.3.3...0b2256b8c012f0828dc542b3febcab082c67f72b">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/upload-artifact&package-manager=github_actions&previous-version=4.3.3&new-version=4.3.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/linux_cpu_dev.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/linux_cpu_dev.yml b/.github/workflows/linux_cpu_dev.yml
index 94a8d308f54fe3..faf0c74934d169 100644
--- a/.github/workflows/linux_cpu_dev.yml
+++ b/.github/workflows/linux_cpu_dev.yml
@@ -167,7 +167,7 @@ jobs:
       # Upload build artifacts and logs
       #
       - name: Upload build logs
-        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
+        uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
         if: always()
         with:
           name: build_logs
@@ -176,7 +176,7 @@ jobs:
 
       - name: Upload openvino package
         if: ${{ always() }}
-        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
+        uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
         with:
           name: openvino_package
           path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz
@@ -184,7 +184,7 @@ jobs:
 
       - name: Upload openvino tests package
         if: ${{ always() }}
-        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
+        uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
         with:
           name: openvino_tests
           path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz
@@ -268,7 +268,7 @@ jobs:
         timeout-minutes: 25
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
+        uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
         if: always()
         with:
           name: test-results-functional-cpu

From c67dce1089e8e1005234d674102dd5226580a0da Mon Sep 17 00:00:00 2001
From: Aleksandr Voron <aleksandr.voron@intel.com>
Date: Tue, 30 Jul 2024 11:13:31 +0200
Subject: [PATCH 08/10] [ARM] Enable OpenMP on ARM platforms (#25329)

### Details:

Apple's `llvm` does not support `-fopenmp`. Brew's `llvm` avoids this
issue on OpenVINO build, however ACL build fails with the error `clang:
error: unsupported argument 'libomp' to option '-fopenmp='`. The
solution is to use `gcc` on Apple:
```
CXX=/opt/homebrew/Cellar/gcc/14.1.0_1/bin/g++-14 CC=/opt/homebrew/Cellar/gcc/14.1.0_1/bin/gcc-14 cmake -DTHREADING=OMP ..
```
On Linux compiler issues were not observed.

`ACLScheduler` uses only 1 thread if OpenMP is used because
`parallel_get_num_threads()` returns 1 in non-parallel section. The fix
is suggested in https://github.com/openvinotoolkit/openvino/pull/25335

### Tickets:
 - *ticket-id*
---
 cmake/dependencies.cmake                         | 15 ++++++++++++++-
 cmake/developer_package/compile_flags/sdl.cmake  |  2 +-
 cmake/developer_package/plugins/plugins.cmake    |  6 +++++-
 cmake/features.cmake                             |  5 +----
 src/cmake/ov_parallel.cmake                      | 11 ++++++++++-
 src/plugins/intel_cpu/thirdparty/ACLConfig.cmake | 15 +++++++++++++--
 6 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake
index 6edda8136b338f..117cf5d2765e6f 100644
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@@ -35,7 +35,20 @@ if(THREADING STREQUAL "OMP")
                 SHA256 "591ea4a7e08bbe0062648916f42bded71d24c27f00af30a8f31a29b5878ea0cc"
                 USE_NEW_LOCATION TRUE)
     else()
-        message(FATAL_ERROR "Intel OMP is not available on current platform")
+        message(WARNING "Pre-built Intel OMP is not available on current platform. System OMP will be used.")
+        find_package(OpenMP)
+        if(OpenMP_CXX_FOUND)
+           foreach(OpenMP_LIB ${OpenMP_CXX_LIBRARIES})
+               string(FIND ${OpenMP_LIB} "omp" OpenMP_LIB_OMP_INDEX)
+               if(NOT OpenMP_LIB_OMP_INDEX EQUAL -1)
+                   cmake_path(GET OpenMP_LIB PARENT_PATH OpenMP_LIB_DIR)
+                   set(OMP_LIB ${OpenMP_LIB} CACHE FILEPATH "Path to OMP library")
+                   set(OMP ${OpenMP_LIB_DIR} CACHE FILEPATH "Path to OMP root folder")
+                   return()
+               endif()
+           endforeach()
+        endif()
+        message(FATAL_ERROR "System OpenMP has not been found")
     endif()
     update_deps_cache(OMP "${OMP}" "Path to OMP root folder")
     debug_message(STATUS "intel_omp=" ${OMP})
diff --git a/cmake/developer_package/compile_flags/sdl.cmake b/cmake/developer_package/compile_flags/sdl.cmake
index d399ec4f62fe0d..4bc0edd33c4975 100644
--- a/cmake/developer_package/compile_flags/sdl.cmake
+++ b/cmake/developer_package/compile_flags/sdl.cmake
@@ -29,7 +29,7 @@ if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG OR OV_COMPILER_IS_INTEL_LLVM
             # Remove all symbol table and relocation information from the executable
             set(OV_C_CXX_FLAGS "${OV_C_CXX_FLAGS} -s")
         endif()
-        if(NOT MINGW)
+        if(NOT MINGW AND NOT APPLE)
             set(OV_LINKER_FLAGS "${OV_LINKER_FLAGS} -z noexecstack -z relro -z now")
         endif()
     elseif(OV_COMPILER_IS_CLANG OR OV_COMPILER_IS_INTEL_LLVM)
diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake
index c76ed82388f3cc..4be67e24cdb6c7 100644
--- a/cmake/developer_package/plugins/plugins.cmake
+++ b/cmake/developer_package/plugins/plugins.cmake
@@ -93,7 +93,11 @@ function(ov_add_plugin)
         endif()
 
         if(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CROSSCOMPILING)
-            target_link_options(${OV_PLUGIN_NAME} PRIVATE -Wl,--unresolved-symbols=ignore-in-shared-libs)
+            if (APPLE)
+                target_link_options(${OV_PLUGIN_NAME} PRIVATE -Wl,-undefined,dynamic_lookup)
+            else()
+                target_link_options(${OV_PLUGIN_NAME} PRIVATE -Wl,--unresolved-symbols=ignore-in-shared-libs)
+            endif()
         endif()
 
         set(custom_filter "")
diff --git a/cmake/features.cmake b/cmake/features.cmake
index 4063e2f8545ced..59dd3b286f0cc1 100644
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@@ -92,10 +92,7 @@ else()
     set(THREADING_DEFAULT "TBB")
 endif()
 
-set(THREADING_OPTIONS "TBB" "TBB_AUTO" "SEQ")
-if(NOT APPLE)
-    list(APPEND THREADING_OPTIONS "OMP")
-endif()
+set(THREADING_OPTIONS "TBB" "TBB_AUTO" "SEQ" "OMP")
 
 set(THREADING "${THREADING_DEFAULT}" CACHE STRING "Threading")
 set_property(CACHE THREADING PROPERTY STRINGS ${THREADING_OPTIONS})
diff --git a/src/cmake/ov_parallel.cmake b/src/cmake/ov_parallel.cmake
index 0f5be2ed43518a..cfb69ce7b1445f 100644
--- a/src/cmake/ov_parallel.cmake
+++ b/src/cmake/ov_parallel.cmake
@@ -329,6 +329,11 @@ function(ov_set_threading_interface_for TARGET_NAME)
     elseif (THREADING STREQUAL "OMP")
         if (WIN32)
             set(omp_lib_name libiomp5md)
+        elseif (ARM OR AARCH64)
+            get_filename_component(OpenMP_CXX_LIB_NAME ${OMP_LIB} NAME)
+            string(REGEX REPLACE "^lib" "" OpenMP_CXX_LIB_NAME ${OpenMP_CXX_LIB_NAME})
+            string(REGEX REPLACE "\\.[^.]*$" "" OpenMP_CXX_LIB_NAME ${OpenMP_CXX_LIB_NAME})
+            set(omp_lib_name ${OpenMP_CXX_LIB_NAME})
         else ()
             set(omp_lib_name iomp5)
         endif ()
@@ -343,7 +348,11 @@ function(ov_set_threading_interface_for TARGET_NAME)
                 set(lib_dbg_path ${lib_rel_path})
             endif ()
         else ()
-            set(lib_rel_path ${OMP}/lib)
+            if (ARM OR AARCH64)
+                set(lib_rel_path ${OMP})
+            else()
+                 set(lib_rel_path ${OMP}/lib)
+            endif ()
             set(lib_dbg_path ${lib_rel_path})
         endif ()
 
diff --git a/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake b/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake
index 09774aa4bec493..a142b5277202e5 100644
--- a/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake
+++ b/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake
@@ -264,6 +264,16 @@ elseif(NOT TARGET arm_compute::arm_compute)
         get_filename_component(toolchain_prefix "${CMAKE_CXX_COMPILER}" DIRECTORY)
         list(APPEND ARM_COMPUTE_OPTIONS toolchain_prefix="${toolchain_prefix}/")
     elseif(APPLE)
+        # we need to bypass this information in case of custom compiler is passed
+        # to cmake call. Such compiler and compiler prefix need to be passed to scons
+        get_filename_component(cxx_compiler "${CMAKE_CXX_COMPILER}" NAME)
+        get_filename_component(c_compiler "${CMAKE_C_COMPILER}" NAME)
+        get_filename_component(compiler_prefix "${CMAKE_CXX_COMPILER}" DIRECTORY)
+
+        set(cmake_build_env
+            CC=${c_compiler}
+            CXX=${cxx_compiler})
+
         if(CMAKE_OSX_DEPLOYMENT_TARGET)
             set(extra_cxx_flags "${extra_cxx_flags} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}")
             set(minos_added ON)
@@ -275,8 +285,9 @@ elseif(NOT TARGET arm_compute::arm_compute)
             endif()
             set(extra_cxx_flags "${extra_cxx_flags} --sysroot ${CMAKE_OSX_SYSROOT}")
         endif()
-
-        set(extra_cxx_flags "${extra_cxx_flags} -Wno-error=return-stack-address")
+        if(OV_COMPILER_IS_CLANG)
+            set(extra_cxx_flags "${extra_cxx_flags} -Wno-error=return-stack-address")
+        endif()
         get_filename_component(compiler_prefix "${CMAKE_CXX_COMPILER}" DIRECTORY)
         list(APPEND ARM_COMPUTE_OPTIONS compiler_prefix="${compiler_prefix}/")
 

From dd2f6141b0e162789332bfccc7dfaa90d6584d2a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 10:15:44 +0000
Subject: [PATCH 09/10] Bump actions/download-artifact from 4.1.7 to 4.1.8
 (#25789)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[actions/download-artifact](https://github.com/actions/download-artifact)
from 4.1.7 to 4.1.8.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/download-artifact/releases">actions/download-artifact's
releases</a>.</em></p>
<blockquote>
<h2>v4.1.8</h2>
<h2>What's Changed</h2>
<ul>
<li>Update <code>@​actions/artifact</code> version, bump dependencies by
<a href="https://github.com/robherley"><code>@​robherley</code></a> in
<a
href="https://redirect.github.com/actions/download-artifact/pull/341">actions/download-artifact#341</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/download-artifact/compare/v4...v4.1.8">https://github.com/actions/download-artifact/compare/v4...v4.1.8</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/download-artifact/commit/fa0a91b85d4f404e444e00e005971372dc801d16"><code>fa0a91b</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/download-artifact/issues/341">#341</a>
from actions/robherley/bump-pkgs</li>
<li><a
href="https://github.com/actions/download-artifact/commit/b54d0883e196647f43ce531a3fc13b246cf908b6"><code>b54d088</code></a>
Update <code>@​actions/artifact</code> version, bump dependencies</li>
<li>See full diff in <a
href="https://github.com/actions/download-artifact/compare/v4.1.7...fa0a91b85d4f404e444e00e005971372dc801d16">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/download-artifact&package-manager=github_actions&previous-version=4.1.7&new-version=4.1.8)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 .github/workflows/linux_cpu_dev.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/linux_cpu_dev.yml b/.github/workflows/linux_cpu_dev.yml
index faf0c74934d169..447a8c52968044 100644
--- a/.github/workflows/linux_cpu_dev.yml
+++ b/.github/workflows/linux_cpu_dev.yml
@@ -209,13 +209,13 @@ jobs:
       PARALLEL_TEST_SCRIPT: ${{ github.workspace }}/install/tests/functional_test_utils/layer_tests_summary/run_parallel.py
     steps:
       - name: Download OpenVINO package
-        uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
           name: openvino_package
           path: ${{ env.INSTALL_DIR }}
 
       - name: Download OpenVINO tests package
-        uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
           name: openvino_tests
           path: ${{ env.INSTALL_TEST_DIR }}

From d253f4fd89c1a77a68cac0fa3d97e627a8ed4467 Mon Sep 17 00:00:00 2001
From: Ivan Tikhonov <ivan.tikhonov@intel.com>
Date: Tue, 30 Jul 2024 14:52:55 +0400
Subject: [PATCH 10/10] PassManager refactoring and new debug caps (#25637)

### Details:

- Simplified run_passes logic in pass::Manager class
- Moved debugging logic to a separate Profiler class
- Added a name for pass::Manager
- Extended debug caps: added serialization by env variable, added
filtring and collection perf statistics in a file

New debug output format:

![image_2024-07-25_17-26-54](https://github.com/user-attachments/assets/2504fa83-3496-47e7-89a3-27c66b3e97b1)


### Tickets:
 - *CVS-147285*
---
 .../src/align_quantization_intervals.cpp      |   2 +-
 .../src/align_quantization_parameters.cpp     |   2 +-
 .../src/low_precision.cpp                     |   4 +-
 .../markup_avg_pool_precision_preserved.cpp   |   2 +-
 .../src/propagate_precisions.cpp              |   2 +-
 .../src/pass/common_optimizations.cpp         |   2 +-
 .../snippets/src/pass/fq_decomposition.cpp    |   2 +-
 src/common/snippets/src/pass/tokenization.cpp |   2 +-
 .../common_optimizations.cpp                  |   2 +-
 .../convert_nms_gather_path_to_unsigned.cpp   |   2 +-
 .../moc_legacy_transformations.cpp            |   2 +-
 .../moc_transformations.cpp                   |   2 +-
 .../optimize_strided_slice.cpp                |   2 +-
 .../common_optimizations/ric_fusion.cpp       |   2 +-
 .../simplify_shape_of_sub_graph.cpp           |   2 +-
 .../src/transformations/convert_precision.cpp |   4 +-
 .../convert_compression_only_to_legacy.cpp    |   2 +-
 ...k_subgraphs_to_keep_in_mixed_precision.cpp |   2 +-
 .../convert_opset2_to_opset1.cpp              |   2 +-
 .../convert_opset3_to_opset2.cpp              |   2 +-
 .../smart_reshape/smart_reshape.cpp           |   4 +-
 .../symbolic_optimizations.cpp                |   2 +-
 src/core/include/openvino/pass/manager.hpp    |  15 +-
 src/core/src/graph_util.cpp                   |   4 +-
 src/core/src/model.cpp                        |   2 +-
 src/core/src/pass/convert_fp32_to_fp16.cpp    |   2 +-
 src/core/src/pass/manager.cpp                 | 300 +++++++++++++-----
 src/core/src/pass/sdpa_to_paged_attention.cpp |   2 +-
 src/frontends/ir/src/frontend.cpp             |   2 +-
 src/frontends/onnx/frontend/src/frontend.cpp  |   6 +-
 src/frontends/paddle/src/frontend.cpp         |   8 +-
 src/frontends/pytorch/src/frontend.cpp        |   2 +-
 src/frontends/tensorflow/src/frontend.cpp     |   4 +-
 .../tensorflow_lite/src/frontend.cpp          |   6 +-
 .../tflite_quantize_resolver.cpp              |   2 +-
 src/plugins/auto_batch/src/plugin.cpp         |   2 +-
 .../convert_to_cpu_specific_opset.hpp         |   2 +-
 .../transformation_pipeline.cpp               |  12 +-
 .../transformations/convert_convolution.cpp   |   2 +-
 .../src/plugin/transformations_pipeline.cpp   |   8 +-
 .../compiler/src/graph_transformations.cpp    |   2 +-
 src/plugins/template/src/plugin.cpp           |   2 +-
 42 files changed, 287 insertions(+), 148 deletions(-)

diff --git a/src/common/low_precision_transformations/src/align_quantization_intervals.cpp b/src/common/low_precision_transformations/src/align_quantization_intervals.cpp
index e8f8bb0250d8af..2d28adfe7f8570 100644
--- a/src/common/low_precision_transformations/src/align_quantization_intervals.cpp
+++ b/src/common/low_precision_transformations/src/align_quantization_intervals.cpp
@@ -20,7 +20,7 @@ ov::pass::low_precision::AlignQuantizationIntervals::AlignQuantizationIntervals(
 
 bool ov::pass::low_precision::AlignQuantizationIntervals::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(AlignQuantizationIntervals);
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("LPT:AlignQuantizationIntervals");
     manager.set_per_pass_validation(false);
     std::shared_ptr<ov::pass::GraphRewrite> intervalsAlignment = manager.register_pass<ov::pass::GraphRewrite>();
     intervalsAlignment->add_matcher<low_precision::CreateAttribute<IntervalsAlignmentAttribute, opset1::FakeQuantize>>(
diff --git a/src/common/low_precision_transformations/src/align_quantization_parameters.cpp b/src/common/low_precision_transformations/src/align_quantization_parameters.cpp
index 236cc802d4f9a3..88729c63a6faf7 100644
--- a/src/common/low_precision_transformations/src/align_quantization_parameters.cpp
+++ b/src/common/low_precision_transformations/src/align_quantization_parameters.cpp
@@ -21,7 +21,7 @@ ov::pass::low_precision::AlignQuantizationParameters::AlignQuantizationParameter
 
 bool ov::pass::low_precision::AlignQuantizationParameters::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(AlignQuantizationParameters);
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("LPT:AlignQuantizationParameters");
     manager.set_per_pass_validation(false);
     std::shared_ptr<ov::pass::GraphRewrite> propagation = manager.register_pass<ov::pass::GraphRewrite>();
     propagation->add_matcher<low_precision::CreateAttribute<QuantizationAlignmentAttribute>>();
diff --git a/src/common/low_precision_transformations/src/low_precision.cpp b/src/common/low_precision_transformations/src/low_precision.cpp
index 6435f47d12ffec..e58374ed3e2b1a 100644
--- a/src/common/low_precision_transformations/src/low_precision.cpp
+++ b/src/common/low_precision_transformations/src/low_precision.cpp
@@ -190,7 +190,7 @@ MarkupOptimizations::MarkupOptimizations(
 
 bool ov::pass::low_precision::MarkupOptimizations::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(MarkupOptimizations);
-    ov::pass::Manager markup(get_pass_config());
+    ov::pass::Manager markup(get_pass_config(), "LPT:MarkupOptimizations");
     markup.set_per_pass_validation(false);
     markup.register_pass<low_precision::MarkupCanBeQuantized>(params.defaultPrecisions);
     if (!precisionRestrictions.empty()) {
@@ -217,7 +217,7 @@ bool ov::pass::low_precision::LowPrecision::run_on_model(const std::shared_ptr<o
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "LowPrecision");
 
     auto passConfig = get_pass_config();
-    ov::pass::Manager manager(passConfig);
+    ov::pass::Manager manager(passConfig, "LowPrecision");
 
     auto prerequisites = manager.register_pass<ov::pass::GraphRewrite>();
     const std::vector<ov::element::Type> supportedTypes = {ov::element::i8, ov::element::u8};
diff --git a/src/common/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp b/src/common/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp
index ef5675bd764b5f..9dfe0c39caa419 100644
--- a/src/common/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp
+++ b/src/common/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp
@@ -19,7 +19,7 @@ ov::pass::low_precision::MarkupAvgPoolPrecisionPreserved::MarkupAvgPoolPrecision
 
 bool ov::pass::low_precision::MarkupAvgPoolPrecisionPreserved::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(MarkupAvgPoolPrecisionPreserved);
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("LPT:MarkupAvgPoolPrecisionPreserved");
     manager.set_per_pass_validation(false);
     std::shared_ptr<ov::pass::GraphRewrite> markupAvgPoolPrecision = manager.register_pass<ov::pass::GraphRewrite>();
     markupAvgPoolPrecision->add_matcher<low_precision::CreatePrecisionsDependentAttribute<AvgPoolPrecisionPreservedAttribute, opset1::AvgPool>>();
diff --git a/src/common/low_precision_transformations/src/propagate_precisions.cpp b/src/common/low_precision_transformations/src/propagate_precisions.cpp
index f849dbfe55263c..a899b6939a6039 100644
--- a/src/common/low_precision_transformations/src/propagate_precisions.cpp
+++ b/src/common/low_precision_transformations/src/propagate_precisions.cpp
@@ -21,7 +21,7 @@ ov::pass::low_precision::PropagatePrecisions::PropagatePrecisions(const Attribut
 
 bool ov::pass::low_precision::PropagatePrecisions::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(PropagatePrecisions);
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("LPT:PropagatePrecisions");
     manager.set_per_pass_validation(false);
     std::shared_ptr<ov::pass::GraphRewrite> precisionsPropagation = manager.register_pass<ov::pass::GraphRewrite>();
     precisionsPropagation->add_matcher<low_precision::CreateAttribute<PrecisionsAttribute, opset1::FakeQuantize>>(params, AttributeSource::OutputPort);
diff --git a/src/common/snippets/src/pass/common_optimizations.cpp b/src/common/snippets/src/pass/common_optimizations.cpp
index 516737f621051c..d9cc6f7f819201 100644
--- a/src/common/snippets/src/pass/common_optimizations.cpp
+++ b/src/common/snippets/src/pass/common_optimizations.cpp
@@ -45,7 +45,7 @@ CommonOptimizations::CommonOptimizations(const SnippetsTokenization::Config& con
 
         // Firstly, we should transform all original Converts inside body to ConvertTruncation to save original behavior.
         // Then if Subgraph contains FakeQuantize we enable specific transformation for quantized subgraphs.
-        ov::pass::Manager manager(get_pass_config());
+        ov::pass::Manager manager(get_pass_config(), "Snippets:CommonOptimizations");
         REGISTER_SNIPPETS_PASS(manager, ov::snippets::pass::TransformConvertToConvertTruncation, true);
         REGISTER_SNIPPETS_PASS(manager, ov::snippets::pass::ExplicitTransposeMatMulInputs, is_domain_sensitive);
         REGISTER_SNIPPETS_PASS(manager, ov::snippets::pass::CommonFakeQuantizeDecomposition, is_quantized);
diff --git a/src/common/snippets/src/pass/fq_decomposition.cpp b/src/common/snippets/src/pass/fq_decomposition.cpp
index 2328e7f12c1681..fe5e98e8a8a4c9 100644
--- a/src/common/snippets/src/pass/fq_decomposition.cpp
+++ b/src/common/snippets/src/pass/fq_decomposition.cpp
@@ -375,7 +375,7 @@ bool ov::snippets::pass::CommonFakeQuantizeDecomposition::is_supported_fq(const
 
 bool ov::snippets::pass::CommonFakeQuantizeDecomposition::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(CommonFakeQuantizeDecomposition);
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("Snippets:CommonFakeQuantizeDecomposition");
     manager.set_per_pass_validation(false);
     manager.register_pass<ov::snippets::pass::FakeQuantizeDecomposition>();
     manager.register_pass<ov::pass::ConstantFolding>();
diff --git a/src/common/snippets/src/pass/tokenization.cpp b/src/common/snippets/src/pass/tokenization.cpp
index 643f169f8b8416..43733fc196ee83 100644
--- a/src/common/snippets/src/pass/tokenization.cpp
+++ b/src/common/snippets/src/pass/tokenization.cpp
@@ -76,7 +76,7 @@ bool EnumerateNodes::run_on_model(const std::shared_ptr<ov::Model> &m) {
 
 bool SnippetsTokenization::run_on_model(const std::shared_ptr<ov::Model>& m) {
     RUN_ON_FUNCTION_SCOPE(SnippetsTokenization);
-    ov::pass::Manager manager(get_pass_config());
+    ov::pass::Manager manager(get_pass_config(), "Snippets:Tokenization");
     manager.set_per_pass_validation(false);
 
     manager.register_pass<EnumerateNodes>();
diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
index 9da4340c2423f4..d7ca44e7ddad34 100644
--- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@@ -121,7 +121,7 @@
 
 bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(CommonOptimizations);
-    ov::pass::Manager manager(get_pass_config());
+    ov::pass::Manager manager(get_pass_config(), "CommonOptimizations");
     manager.set_per_pass_validation(false);
 
     using namespace ov::pass;
diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp
index 3252882472ffec..7c22dbdfeac53d 100644
--- a/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp
@@ -102,7 +102,7 @@ class PropagateNMSPath : public pass::MatcherPass {
 
                 for (size_t body_idx = 0; body_idx < models.size(); ++body_idx) {
                     handle_params(multi_subgraph_op, models[body_idx], static_cast<int>(body_idx));
-                    ov::pass::Manager manager;
+                    ov::pass::Manager manager("PropagateNMSPath");
                     manager.register_pass<ov::pass::PropagateNMSPath>();
                     manager.run_passes(models[body_idx]);
                     handle_results(multi_subgraph_op, models[body_idx], static_cast<int>(body_idx));
diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_legacy_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_legacy_transformations.cpp
index 83cf163555c327..7c21f98439d9a4 100644
--- a/src/common/transformations/src/transformations/common_optimizations/moc_legacy_transformations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/moc_legacy_transformations.cpp
@@ -12,7 +12,7 @@
 
 bool ov::pass::MOCLegacyTransformations::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_MODEL_SCOPE(MOCLegacyTransformations);
-    ov::pass::Manager manager(get_pass_config());
+    ov::pass::Manager manager(get_pass_config(), "MOCLegacyTransformations");
     using namespace ov::pass;
     REGISTER_PASS(manager, ChangePlaceholderTypes, m_params_with_custom_types)
     manager.run_passes(f);
diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
index 44c227623f444d..3cf542377d5adc 100644
--- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@@ -125,7 +125,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ov::Model>
         f->validate_nodes_and_infer_types();
     }
 
-    ov::pass::Manager manager(get_pass_config());
+    ov::pass::Manager manager(get_pass_config(), "MOC");
     manager.set_per_pass_validation(false);
     using namespace ov::pass;
     REGISTER_PASS(manager, InitNodeInfo)
diff --git a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp
index ace7e544bc994c..8d093878ff0b93 100644
--- a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp
@@ -425,7 +425,7 @@ ov::pass::StridedSliceOptimization::StridedSliceOptimization(bool use_shapes) {
 
 bool ov::pass::StridedSliceOptimization::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(StridedSliceOptimization);
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("StridedSliceOptimization");
     manager.set_per_pass_validation(false);
     if (m_use_shapes) {
         manager.register_pass<UselessSliceEraser>();
diff --git a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp
index 7c095fbe89fe1f..6e44692b5f169c 100644
--- a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp
@@ -844,7 +844,7 @@ bool ov::pass::ReverseInputChannelsFusion::run_on_model(const std::shared_ptr<ov
     // First we need to initialize and propagate RIC attributes through entire graph
     {
         using namespace init;
-        Manager m;
+        Manager m("ReverseInputChannelsFusion");
         m.set_per_pass_validation(false);
         auto ric_init = m.register_pass<GraphRewrite>();
         ADD_MATCHER(ric_init, SplitConcat, nodes_to_fuse)
diff --git a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp
index d4bb02227c56ac..a225f0655f98ee 100644
--- a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp
@@ -353,7 +353,7 @@ pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() {
 
 bool pass::SimplifyShapeOfSubGraph::run_on_model(const std::shared_ptr<Model>& f) {
     RUN_ON_FUNCTION_SCOPE(SimplifyShapeOfSubGraph);
-    Manager manager(get_pass_config());
+    Manager manager(get_pass_config(), "SimplifyShapeOfSubGraph");
     manager.set_per_pass_validation(false);
 
     REGISTER_PASS(manager, PrepareShapeOpsForEliminationAroundBE)
diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp
index 6d9d11ff52bcba..3c819d481bacee 100644
--- a/src/common/transformations/src/transformations/convert_precision.cpp
+++ b/src/common/transformations/src/transformations/convert_precision.cpp
@@ -413,7 +413,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ov::Model>&
     bool has_fp16_compression = m_precisions.count(element::f32) > 0 && m_precisions[element::f32] == element::f16;
 
     if (m_keep_precision_sensitive_in_fp32 && has_fp16_compression) {
-        pass::Manager manager(get_pass_config());
+        pass::Manager manager(get_pass_config(), "KeepPrecisionSensitiveInFP32");
         // Mark subgraphs with disable_fp16_compression to keep them in FP32
         manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
         manager.register_pass<pass::AlignMixedFP32FP16Types>();
@@ -494,7 +494,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ov::Model>&
 
     // to remove extra converts
     if (m_keep_precision_sensitive_in_fp32) {
-        pass::Manager manager(get_pass_config());
+        pass::Manager manager(get_pass_config(), "KeepPrecisionSensitiveInFP32:RemoveConverts");
         manager.register_pass<pass::EnableDecompressionConvertConstantFolding>();
         manager.register_pass<pass::ConstantFolding>();
         manager.run_passes(f);
diff --git a/src/common/transformations/src/transformations/fp16_compression/convert_compression_only_to_legacy.cpp b/src/common/transformations/src/transformations/fp16_compression/convert_compression_only_to_legacy.cpp
index ebeb900a31f2b8..b471424aeb9e65 100644
--- a/src/common/transformations/src/transformations/fp16_compression/convert_compression_only_to_legacy.cpp
+++ b/src/common/transformations/src/transformations/fp16_compression/convert_compression_only_to_legacy.cpp
@@ -17,7 +17,7 @@ using namespace ov;
 bool ov::pass::ConvertCompressedOnlyToLegacy::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_MODEL_SCOPE(ConvertCompressedOnlyToLegacy);
     if (ov::op::util::has_decompression_converts(f)) {
-        Manager manager(get_pass_config());
+        Manager manager(get_pass_config(), "ConvertCompressedOnlyToLegacy");
 
         const precisions_map convert_precision_map{{ov::element::f32, ov::element::f16}};
         manager.register_pass<ConvertPrecision>(convert_precision_map);
diff --git a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp
index c5e768fa687bc2..45c455fd61e87c 100644
--- a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp
+++ b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp
@@ -423,7 +423,7 @@ class PropagateDownDisableSensitivityForQuantized : public pass::MatcherPass {
 bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model>& m) {
     RUN_ON_MODEL_SCOPE(MarkSugraphsToKeepInMixedPrecision);
 
-    Manager manager(get_pass_config());
+    Manager manager(get_pass_config(), "MarkSugraphsToKeepInMixedPrecision");
     // Mark root of Division with eps pattern to keep in FP32
     REGISTER_PASS(manager, MarkDivWithEps)
     REGISTER_PASS(manager, MarkExpInReduceOpPath)
diff --git a/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp b/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp
index ab042a4cbdaaf4..3279fcb3e6cb55 100644
--- a/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp
+++ b/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp
@@ -14,7 +14,7 @@
 
 bool ov::pass::ConvertOpSet2ToOpSet1::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(ConvertOpSet2ToOpSet1);
-    ov::pass::Manager manager(get_pass_config());
+    ov::pass::Manager manager(get_pass_config(), "ConvertOpSet2ToOpSet1");
     manager.set_per_pass_validation(false);
 
     manager.register_pass<ov::pass::ConvertSpaceToBatch>();
diff --git a/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp b/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp
index 4c9ef1950ace84..045ad23a506220 100644
--- a/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp
+++ b/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp
@@ -17,7 +17,7 @@
 
 bool ov::pass::ConvertOpSet3ToOpSet2::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(ConvertOpSet3ToOpSet2);
-    ov::pass::Manager manager(get_pass_config());
+    ov::pass::Manager manager(get_pass_config(), "ConvertOpSet3ToOpSet2");
     manager.set_per_pass_validation(false);
 
     manager.register_pass<ov::pass::ConvertBroadcast3>();
diff --git a/src/common/transformations/src/transformations/smart_reshape/smart_reshape.cpp b/src/common/transformations/src/transformations/smart_reshape/smart_reshape.cpp
index 67e7084380375e..e1ddcdc3540e56 100644
--- a/src/common/transformations/src/transformations/smart_reshape/smart_reshape.cpp
+++ b/src/common/transformations/src/transformations/smart_reshape/smart_reshape.cpp
@@ -21,7 +21,7 @@
 
 bool ov::pass::SmartReshape::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_FUNCTION_SCOPE(SmartReshape);
-    ov::pass::Manager static_manager;
+    ov::pass::Manager static_manager("SmartReshape:static");
     // This pass must be called first in pipeline
     static_manager.register_pass<ov::pass::InitNodeInfo>();
     static_manager.register_pass<ov::pass::ReshapeTo1D>();
@@ -37,7 +37,7 @@ bool ov::pass::SmartReshape::run_on_model(const std::shared_ptr<ov::Model>& f) {
     static_manager.register_pass<ov::pass::ReshapeSinkingMatMul>();
     static_manager.run_passes(f);
 
-    ov::pass::Manager dynamic_manager;
+    ov::pass::Manager dynamic_manager("SmartReshape:dynamic");
     // function revalidation will cause "fake" dynamism due to ShapeOf ops insertions
     // we turn it off to have access to originally static shapes
     dynamic_manager.set_per_pass_validation(false);
diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp
index 197644ffca6d84..d6629f326a2a70 100644
--- a/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp
+++ b/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp
@@ -172,7 +172,7 @@ ov::pass::LabelResolvingThroughSelect::LabelResolvingThroughSelect() {
 }
 
 ov::pass::SymbolicOptimizations::SymbolicOptimizations(bool full_run) {
-    m_manager = std::make_shared<pass::Manager>();
+    m_manager = std::make_shared<pass::Manager>("Symbolic");
     m_manager->set_per_pass_validation(false);
 
 #define REGISTER_SYMBOLIC(region, ...) m_manager->register_pass<region>(__VA_ARGS__);
diff --git a/src/core/include/openvino/pass/manager.hpp b/src/core/include/openvino/pass/manager.hpp
index 8ca9ce354eeb5c..a026957697f2db 100644
--- a/src/core/include/openvino/pass/manager.hpp
+++ b/src/core/include/openvino/pass/manager.hpp
@@ -23,8 +23,11 @@ class OPENVINO_API Manager {
     Manager();
     virtual ~Manager();
 
+    //// \brief Construct Manager with a provided name.
+    explicit Manager(std::string name);
+
     //// \brief Construct Manager with shared PassConfig instance
-    explicit Manager(std::shared_ptr<PassConfig> pass_config);
+    explicit Manager(std::shared_ptr<PassConfig> pass_config, std::string name = "UnnamedManager");
 
     /// \brief Register given transformation class type to execution list
     /// Example below show the basic usage of pass::Manager
@@ -66,11 +69,8 @@ class OPENVINO_API Manager {
     ///
     /// \return     Returns true if the model was changed by transformations,
     ///             false otherwise.
-    bool run_passes(std::shared_ptr<Model> model);
+    bool run_passes(const std::shared_ptr<Model>& model);
 
-    void set_pass_visualization(bool new_state) {
-        m_visualize = new_state;
-    }
     /// \brief Set flag to enable/disable running Validate pass after executing
     /// each registered pass
     /// \param new_state Value "true" enables Validate pass run; "false", otherwise
@@ -97,8 +97,11 @@ class OPENVINO_API Manager {
 
     std::shared_ptr<PassConfig> m_pass_config;
     std::vector<std::shared_ptr<PassBase>> m_pass_list;
-    bool m_visualize = false;
     bool m_per_pass_validation = true;
+    std::string m_name = "UnnamedManager";
+
+private:
+    bool run_pass(const std::shared_ptr<PassBase>& pass, const std::shared_ptr<Model>& model, bool needs_validate);
 };
 }  // namespace pass
 }  // namespace ov
diff --git a/src/core/src/graph_util.cpp b/src/core/src/graph_util.cpp
index fa36c61cb38d84..17780f7751d52e 100644
--- a/src/core/src/graph_util.cpp
+++ b/src/core/src/graph_util.cpp
@@ -326,7 +326,7 @@ void serialize(const std::shared_ptr<const ov::Model>& m,
                const std::string& xml_path,
                const std::string& bin_path,
                ov::pass::Serialize::Version version) {
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("Serialize");
     manager.register_pass<ov::pass::Serialize>(xml_path, bin_path, version);
     manager.run_passes(std::const_pointer_cast<ov::Model>(m));
 }
@@ -339,7 +339,7 @@ void save_model(const std::shared_ptr<const ov::Model>& m, const std::string& ou
         ov::pass::compress_model_to_f16(cloned, postponed);
     }
 
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("SaveModel");
     manager.register_pass<ov::pass::FusedNamesCleanup>();
     manager.register_pass<ov::pass::Serialize>(output_model, "");
     manager.run_passes(std::move(cloned));
diff --git a/src/core/src/model.cpp b/src/core/src/model.cpp
index 88609222885f05..1493d950cd78ef 100644
--- a/src/core/src/model.cpp
+++ b/src/core/src/model.cpp
@@ -870,7 +870,7 @@ void ov::Model::reshape(const std::map<ov::Output<ov::Node>, ov::PartialShape>&
     };
 
     try {
-        ov::pass::Manager ssr_manager;
+        ov::pass::Manager ssr_manager("SmartReshape");
         ssr_manager.register_pass<ov::pass::SmartReshape>();
         ssr_manager.run_passes(shared_from_this());
 
diff --git a/src/core/src/pass/convert_fp32_to_fp16.cpp b/src/core/src/pass/convert_fp32_to_fp16.cpp
index 4ccd024e1cf830..aab1180ff47dfb 100644
--- a/src/core/src/pass/convert_fp32_to_fp16.cpp
+++ b/src/core/src/pass/convert_fp32_to_fp16.cpp
@@ -10,7 +10,7 @@
 
 bool ov::pass::ConvertFP32ToFP16::run_on_model(const std::shared_ptr<ov::Model>& f) {
     RUN_ON_MODEL_SCOPE(ConvertFP32ToFP16);
-    ov::pass::Manager m(get_pass_config());
+    ov::pass::Manager m(get_pass_config(), "ConvertFP32ToFP16");
     m.register_pass<ov::pass::ConvertPrecision>(precisions_map{{ov::element::f32, ov::element::f16}});
     m.run_passes(f);
     return false;
diff --git a/src/core/src/pass/manager.cpp b/src/core/src/pass/manager.cpp
index 13404562a4318d..4b01c09a077bde 100644
--- a/src/core/src/pass/manager.cpp
+++ b/src/core/src/pass/manager.cpp
@@ -5,15 +5,18 @@
 #include "openvino/pass/manager.hpp"
 
 #include <algorithm>
+#include <fstream>
 #include <iomanip>
 #include <iostream>
 #include <memory>
 #include <mutex>
 #include <unordered_map>
+#include <utility>
 
 #include "itt.hpp"
 #include "openvino/pass/graph_rewrite.hpp"
 #include "openvino/pass/visualize_tree.hpp"
+#include "openvino/util/common_util.hpp"
 #include "openvino/util/env_util.hpp"
 #include "openvino/util/log.hpp"
 #include "perf_counters.hpp"
@@ -36,35 +39,73 @@ PerfCounters& perf_counters() {
 #endif  // ENABLE_PROFILING_ITT
 
 namespace {
-bool getenv_visualize_tracing() {
-    return ov::util::getenv_bool("OV_ENABLE_VISUALIZE_TRACING");
-}
-}  // namespace
 
-ov::pass::Manager::Manager() : m_pass_config(std::make_shared<PassConfig>()), m_visualize(getenv_visualize_tracing()) {}
+/**
+ * @brief EnvVar gets the environment variable value by name.
+ * It tries to interpret the value as boolean, if it fails then
+ * the original string value is stored. This behavior helps us to reduce the number
+ * of the additional env variables.
+ *
+ * Example of usage:
+ * if OV_ENABLE_PROFILE_PASS is true, it enables console output.
+ * if OV_ENABLE_PROFILE_PASS contains a path to file (string), the out logs
+ * will be re-directed to the file.
+ */
+class EnvVar {
+public:
+    explicit EnvVar(const std::string& var) {
+        const auto& val = ov::util::getenv_string(var.c_str());
+        std::set<std::string> off = {"0", "false", "off"};
+        std::set<std::string> on = {"1", "true", "on"};
 
-ov::pass::Manager::~Manager() = default;
+        const auto& val_lower = ov::util::to_lower(var);
+        if (off.count(val_lower)) {
+            m_is_bool = true;
+        } else if (on.count(val_lower)) {
+            m_is_bool = true;
+            b_value = true;
+        } else {
+            s_value = val;
+        }
+    }
 
-ov::pass::Manager::Manager(std::shared_ptr<ov::pass::PassConfig> pass_config)
-    : m_pass_config(std::move(pass_config)),
-      m_visualize(getenv_visualize_tracing()) {}
+    /**
+     * @brief This ctor helps to activate/deactivate EnvVar from the code.
+     */
+    explicit EnvVar(const std::string& var, bool activate) {
+        m_is_bool = true;
+        b_value = activate;
+    }
 
-void ov::pass::Manager::set_per_pass_validation(bool new_state) {
-    m_per_pass_validation = new_state;
-}
+    bool is_enabled() const {
+        return b_value || !s_value.empty();
+    }
+
+    bool is_bool() const {
+        return m_is_bool;
+    }
+
+    const std::string& get_str() const {
+        return s_value;
+    }
+
+private:
+    bool m_is_bool = false;
+    bool b_value = false;
+    std::string s_value;
+};
 
-namespace {
 class stopwatch {
 public:
     void start() {
-        if (m_active == false) {
+        if (!m_active) {
             m_active = true;
             m_start_time = m_clock.now();
         }
     }
 
     void stop() {
-        if (m_active == true) {
+        if (m_active) {
             auto end_time = m_clock.now();
             m_last_time = end_time - m_start_time;
             m_active = false;
@@ -89,89 +130,184 @@ class stopwatch {
     bool m_active = false;
     std::chrono::nanoseconds m_last_time = std::chrono::high_resolution_clock::duration::zero();
 };
-}  // namespace
 
-bool ov::pass::Manager::run_passes(shared_ptr<ov::Model> func) {
-    OV_ITT_SCOPED_TASK(ov::itt::domains::core, "pass::Manager::run_passes");
-
-    static bool profile_enabled = ov::util::getenv_bool("OV_PROFILE_PASS_ENABLE");
-
-    size_t index = 0;
-    stopwatch pass_timer;
-    stopwatch overall_timer;
-    overall_timer.start();
-    bool pass_applied = false;
-    bool function_changed = false;
-    bool needs_validate = false;
-    for (auto& pass : m_pass_list) {
-        if (m_pass_config->is_disabled(pass->get_type_info())) {
-            OPENVINO_DEBUG("Pass ", pass->get_name(), " is disabled");
-            continue;
+class Profiler {
+public:
+    explicit Profiler(std::string manager_name)
+        : m_visualize("OV_ENABLE_VISUALIZE_TRACING"),
+          m_serialize("OV_ENABLE_SERIALIZE_TRACING"),
+          m_profile_pass("OV_ENABLE_PROFILE_PASS"),
+          m_manager_name(std::move(manager_name)) {
+        if (m_profile_pass.is_enabled() && !m_profile_pass.is_bool()) {
+            m_file.open(m_profile_pass.get_str(), std::ios_base::app);
         }
+    }
 
-        OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ov_pass, ov::pass::perf_counters()[pass->get_type_info()]);
+    ~Profiler() {
+        if (m_file.is_open()) {
+            m_file.close();
+        }
+    }
 
-        pass_timer.start();
+    void start_timer(const std::string& name) {
+        if (m_profile_pass.is_enabled()) {
+            stopwatches[name] = stopwatch();
+            stopwatches[name].start();
 
-        if (auto matcher_pass = dynamic_pointer_cast<MatcherPass>(pass)) {
-            // This checks is to skip the graph transformation when the graph pass relies on
-            // static shape but the function state is dynamic.
-            if (matcher_pass->get_property(PassProperty::REQUIRE_STATIC_SHAPE) && func->is_dynamic()) {
-                OPENVINO_DEBUG("Pass ",
-                               pass->get_name(),
-                               " requires static shape but the "
-                               "model is dynamic. Skipping this transformation");
-                continue;
-            }
-            // GraphRewrite is a temporary container for MatcherPass to make execution
-            // on on entire ov::Model
-            pass_applied = GraphRewrite(matcher_pass).run_on_model(func);
-        } else if (auto function_pass = dynamic_pointer_cast<ModelPass>(pass)) {
-            // This checks is to skip the graph transformation when the graph pass relies on
-            // static shape but the function state is dynamic.
-            if (function_pass->get_property(PassProperty::REQUIRE_STATIC_SHAPE) && func->is_dynamic()) {
-                OPENVINO_DEBUG("Pass ",
-                               pass->get_name(),
-                               " requires static shape but the "
-                               "model is dynamic. Skipping this transformation");
-                continue;
+            bool is_pass_manager = name == m_manager_name;
+            if (is_pass_manager) {
+                std::cout << std::setw(25) << left;
+                std::cout << "PassManager started: " << m_manager_name << std::endl;
             }
+        }
+    }
+
+    void stop_timer(const std::string& name, bool applied) {
+        if (m_profile_pass.is_enabled()) {
+            auto& stopwatch = stopwatches.at(name);
+            stopwatch.stop();
 
-            if (dynamic_pointer_cast<Validate>(pass)) {
-                if (needs_validate) {
-                    function_pass->run_on_model(func);
-                    needs_validate = false;
+            bool is_pass_manager = name == m_manager_name;
+            if (m_profile_pass.is_bool()) {
+                std::cout << std::setw(25) << left;
+                if (is_pass_manager) {
+                    std::cout << "PassManager finished: ";
+                } else {
+                    std::cout << "  ";
+                }
+                std::cout << std::setw(60) << left << name;
+                std::cout << std::setw(5) << right << stopwatch.get_milliseconds() << "ms " << (applied ? "+" : "-")
+                          << std::endl;
+            } else if (m_file.is_open()) {
+                if (is_pass_manager) {
+                    m_file << "m;" << name << ";" << stopwatch.get_timer_value().count() << ";" << (applied ? "1" : "0")
+                           << std::endl;
+                } else {
+                    m_file << "t;" << name << ";" << m_manager_name << ";" << stopwatch.get_timer_value().count() << ";"
+                           << (applied ? "1" : "0") << std::endl;
                 }
             } else {
-                pass_applied = function_pass->run_on_model(func);
+                OPENVINO_THROW("The output file for logging transformation statistics is closed. "
+                               "Recording of statistics is not possible.");
             }
         }
+    }
 
-        if (m_visualize) {
-            // visualizations and serializations will be named after the outermost function
-            const size_t num_digits_in_pass_index = 3;
-            std::string index_str = std::to_string(index);
-            index_str = std::string(num_digits_in_pass_index - index_str.length(), '0') + index_str;
-            auto base_filename = func->get_name() + std::string("_") + index_str + std::string("_") + pass->get_name();
-
-            if (m_visualize) {
-                auto file_ext = "svg";
-                pass::VisualizeTree vt(base_filename + std::string(".") + file_ext);
-                vt.run_on_model(func);
+    void visualize(const shared_ptr<ov::Model>& model, const std::string& pass_name) const {
+        static size_t viz_index = 0;
+        if (m_visualize.is_enabled()) {
+            const auto& filter = m_visualize.get_str();
+            if (m_visualize.is_bool() || (pass_name.find(filter) != std::string::npos)) {
+                const auto& file_name = gen_file_name(model->get_name(), pass_name, viz_index++);
+                ov::pass::VisualizeTree vt(file_name + ".svg");
+                vt.run_on_model(model);
             }
         }
-        index++;
-        pass_timer.stop();
-        if (profile_enabled) {
-            cout << setw(7) << pass_timer.get_milliseconds() << "ms" << (pass_applied ? " + " : "   ")
-                 << pass->get_name() << "\n";
+    }
+
+    void serialize(const shared_ptr<ov::Model>& model, const std::string& pass_name) const {
+        static size_t serialize_index = 0;
+        if (m_serialize.is_enabled()) {
+            const auto& filter = m_serialize.get_str();
+            if (m_serialize.is_bool() || (pass_name.find(filter) != std::string::npos)) {
+                const auto& file_name = gen_file_name(model->get_name(), pass_name, serialize_index++);
+                ov::pass::Serialize serialize(file_name + ".xml", file_name + ".bin");
+                serialize.run_on_model(model);
+            }
         }
-        function_changed = function_changed || pass_applied;
-        needs_validate = pass_applied;
     }
-    if (profile_enabled) {
-        cout << "passes done in " << overall_timer.get_milliseconds() << "ms\n";
+
+private:
+    static std::string gen_file_name(const std::string& model_name, const std::string& pass_name, const size_t idx) {
+        std::stringstream name;
+        // visualizations and serializations will be named after the outermost function
+        const size_t num_digits_in_pass_index = 3;
+        std::string index_str = std::to_string(idx);
+        index_str = std::string(num_digits_in_pass_index - index_str.length(), '0') + index_str;
+
+        name << model_name << std::string("_") << index_str << std::string("_") << pass_name;
+        return name.str();
     }
 
-    return function_changed;
+    std::unordered_map<std::string, stopwatch> stopwatches;
+
+    EnvVar m_visualize;
+    EnvVar m_serialize;
+    EnvVar m_profile_pass;
+
+    std::string m_manager_name;
+    std::fstream m_file;
+};
+
+}  // namespace
+
+ov::pass::Manager::Manager() : m_pass_config(std::make_shared<PassConfig>()) {}
+
+ov::pass::Manager::~Manager() = default;
+
+ov::pass::Manager::Manager(std::string name) : m_pass_config(std::make_shared<PassConfig>()), m_name(std::move(name)) {}
+
+ov::pass::Manager::Manager(std::shared_ptr<ov::pass::PassConfig> pass_config, std::string name)
+    : m_pass_config(std::move(pass_config)),
+      m_name(std::move(name)) {}
+
+void ov::pass::Manager::set_per_pass_validation(bool new_state) {
+    m_per_pass_validation = new_state;
+}
+
+bool ov::pass::Manager::run_passes(const shared_ptr<ov::Model>& model) {
+    OV_ITT_SCOPED_TASK(ov::itt::domains::core, "pass::Manager::run_passes");
+    Profiler profiler(m_name);
+
+    bool model_changed = false;
+    bool pass_changed_model = false;
+
+    profiler.start_timer(m_name);
+    for (const auto& pass : m_pass_list) {
+        const auto& pass_name = pass->get_name();
+
+        profiler.start_timer(pass_name);
+        pass_changed_model = run_pass(pass, model, pass_changed_model);
+        profiler.stop_timer(pass_name, pass_changed_model);
+
+        model_changed = model_changed || pass_changed_model;
+
+        profiler.visualize(model, pass_name);
+        profiler.serialize(model, pass_name);
+    }
+    profiler.stop_timer(m_name, model_changed);
+
+    return model_changed;
+}
+
+bool ov::pass::Manager::run_pass(const std::shared_ptr<PassBase>& pass,
+                                 const std::shared_ptr<Model>& model,
+                                 bool needs_validate) {
+    if (m_pass_config->is_disabled(pass->get_type_info())) {
+        OPENVINO_DEBUG("Pass ", pass->get_name(), " is disabled.");
+        return false;
+    }
+
+    // This checks if we need to skip the graph transformation when the graph pass relies on
+    // static shape but the model state is dynamic.
+    if (pass->get_property(PassProperty::REQUIRE_STATIC_SHAPE) && model->is_dynamic()) {
+        OPENVINO_DEBUG("Pass ",
+                       pass->get_name(),
+                       " requires static shape but the ",
+                       "model is dynamic. Skipping this transformation.");
+        return false;
+    }
+
+    OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ov_pass, ov::pass::perf_counters()[pass->get_type_info()]);
+
+    if (auto matcher_pass = dynamic_pointer_cast<MatcherPass>(pass)) {
+        // GraphRewrite is a temporary container for MatcherPass to make execution on entire ov::Model
+        return GraphRewrite(matcher_pass).run_on_model(model);
+    } else if (auto model_pass = dynamic_pointer_cast<ModelPass>(pass)) {
+        if (dynamic_pointer_cast<ov::pass::Validate>(model_pass) && !needs_validate) {
+            return false;
+        }
+        return model_pass->run_on_model(model);
+    }
+    return false;
 }
diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp
index 8630fbefe728ec..8b19b07f2f5d76 100644
--- a/src/core/src/pass/sdpa_to_paged_attention.cpp
+++ b/src/core/src/pass/sdpa_to_paged_attention.cpp
@@ -101,7 +101,7 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
     auto batch_dim =
         std::make_shared<v3::ShapeOf>(position_ids);  // it is not always required, so will be disposed if not needed
 
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("SDPA to PA");
     manager.set_per_pass_validation(false);
     manager.register_pass<StateManagementPattern>(kv_parameters,
                                                   model_remaining_params,
diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp
index 74553520410cda..b8e92e8b37b291 100644
--- a/src/frontends/ir/src/frontend.cpp
+++ b/src/frontends/ir/src/frontend.cpp
@@ -252,7 +252,7 @@ std::string FrontEnd::get_name() const {
 }
 
 void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("Frontend:IR:normalize");
     manager.register_pass<pass::ResolveNameCollisions>();
     manager.run_passes(model);
 }
diff --git a/src/frontends/onnx/frontend/src/frontend.cpp b/src/frontends/onnx/frontend/src/frontend.cpp
index 529869a7d936d0..d4b83fee20db82 100644
--- a/src/frontends/onnx/frontend/src/frontend.cpp
+++ b/src/frontends/onnx/frontend/src/frontend.cpp
@@ -93,7 +93,7 @@ std::shared_ptr<ov::Model> FrontEnd::convert_partially(const InputModel::Ptr& in
     if (!m_transformation_extensions.empty()) {
         auto model = decode(input_model);
 
-        ov::pass::Manager manager;
+        ov::pass::Manager manager("Frontend:ONNX:convert_partially");
         for (const auto& transformation : m_transformation_extensions) {
             transformation->register_pass(manager);
         }
@@ -113,7 +113,7 @@ std::shared_ptr<ov::Model> FrontEnd::convert_partially(const InputModel::Ptr& in
 void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
     // Here, you can register transformations as a second step of importing process
     // In particular, you can operate on not supported ops (it allows to N:N ONNX->OV mapping).
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("Frontend:ONNX:normalize");
     manager.register_pass<pass::ResolveNameCollisions>(true);
     manager.run_passes(model);
 }
@@ -125,7 +125,7 @@ std::shared_ptr<ov::Model> FrontEnd::convert(const InputModel::Ptr& input_model)
     if (!m_transformation_extensions.empty()) {
         auto model = decode(input_model);
 
-        ov::pass::Manager manager;
+        ov::pass::Manager manager("Frontend:ONNX:convert");
         for (const auto& transformation : m_transformation_extensions) {
             transformation->register_pass(manager);
         }
diff --git a/src/frontends/paddle/src/frontend.cpp b/src/frontends/paddle/src/frontend.cpp
index 0b6ab0d4eef331..c6febe08437b5d 100644
--- a/src/frontends/paddle/src/frontend.cpp
+++ b/src/frontends/paddle/src/frontend.cpp
@@ -343,7 +343,7 @@ std::map<int32_t, std::shared_ptr<ov::Model>> FrontEnd::convert_each_node_recurs
 
 void FrontEnd::try_remove_internal_ops(const std::vector<std::shared_ptr<Model>>& models) const {
     for (auto& model : models) {
-        ov::pass::Manager manager;
+        ov::pass::Manager manager("Frontend:Paddle:try_remove_internal_ops");
         manager.register_pass<ov::frontend::paddle::pass::TransformTensorArray>(models);
         manager.register_pass<ov::frontend::paddle::pass::TransformIf>(models);
         manager.register_pass<ov::frontend::paddle::pass::TransformWhile>(models);
@@ -357,7 +357,7 @@ void FrontEnd::try_remove_internal_ops(const std::vector<std::shared_ptr<Model>>
 
 void FrontEnd::fuse_fakequantize_ops(const std::vector<std::shared_ptr<Model>>& models) const {
     for (auto& model : models) {
-        ov::pass::Manager manager;
+        ov::pass::Manager manager("Frontend:Paddle:fuse_fakequantize_ops");
         manager.register_pass<ov::frontend::paddle::pass::TransformFakeQuantize>();
         manager.run_passes(model);
     }
@@ -506,7 +506,7 @@ std::shared_ptr<ov::Model> FrontEnd::convert_partially(const InputModel::Ptr& mo
     if (!m_transformation_extensions.empty()) {
         auto function = decode(model);
 
-        ov::pass::Manager manager;
+        ov::pass::Manager manager("Frontend:Paddle:convert_partially");
         for (const auto& transformation : m_transformation_extensions) {
             transformation->register_pass(manager);
         }
@@ -572,7 +572,7 @@ void FrontEnd::add_extension(const std::shared_ptr<ov::Extension>& extension) {
 }
 
 void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("Frontend:Paddle:normalize");
     manager.register_pass<ov::pass::ResolveNameCollisions>(true);
     manager.run_passes(model);
 }
diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp
index 7e63f3c4aeb456..2cbb5c4d6bc96e 100644
--- a/src/frontends/pytorch/src/frontend.cpp
+++ b/src/frontends/pytorch/src/frontend.cpp
@@ -240,7 +240,7 @@ std::shared_ptr<Model> FrontEnd::decode(const InputModel::Ptr& model) const {
 }
 
 void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("Frontend:Pytorch:normalize");
 
     // GPTQ transformations need to be executed before other passes
     // Once the GPTQ patterns are modified by other transformations,
diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp
index e2e49355e914f0..aac5811223e135 100644
--- a/src/frontends/tensorflow/src/frontend.cpp
+++ b/src/frontends/tensorflow/src/frontend.cpp
@@ -488,7 +488,7 @@ std::shared_ptr<ov::Model> FrontEnd::convert_partially(const ov::frontend::Input
     if (!m_transformation_extensions.empty()) {
         auto function = decode(model);
 
-        ov::pass::Manager manager;
+        ov::pass::Manager manager("Frontend:TF:convert_partially");
         for (const auto& transformation : m_transformation_extensions) {
             transformation->register_pass(manager);
         }
@@ -557,7 +557,7 @@ void FrontEnd::convert(const std::shared_ptr<ov::Model>& partiallyConverted) con
 }
 
 void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("Frontend:TF:normalize");
 
     // Mark quantized and f16/bf16 compressed constants to prevent CF for them,
     // so that not extra memory is used for intermediate decompressed constants.
diff --git a/src/frontends/tensorflow_lite/src/frontend.cpp b/src/frontends/tensorflow_lite/src/frontend.cpp
index 62d75d9c2513c6..15e89301983811 100644
--- a/src/frontends/tensorflow_lite/src/frontend.cpp
+++ b/src/frontends/tensorflow_lite/src/frontend.cpp
@@ -111,7 +111,7 @@ std::shared_ptr<ov::Model> FrontEnd::convert(const ov::frontend::InputModel::Ptr
     if (!m_transformation_extensions.empty()) {
         auto ov_model = decode(model);
 
-        ov::pass::Manager manager;
+        ov::pass::Manager manager("Frontend:TFLite:convert");
         for (const auto& transformation : m_transformation_extensions) {
             transformation->register_pass(manager);
         }
@@ -153,7 +153,7 @@ void FrontEnd::convert(const std::shared_ptr<ov::Model>& partiallyConverted) con
 std::shared_ptr<ov::Model> FrontEnd::convert_partially(const ov::frontend::InputModel::Ptr& model) const {
     if (!m_transformation_extensions.empty()) {
         auto function = decode(model);
-        ov::pass::Manager manager;
+        ov::pass::Manager manager("Frontend:TFLite:convert_partially");
         for (const auto& transformation : m_transformation_extensions) {
             transformation->register_pass(manager);
         }
@@ -293,7 +293,7 @@ std::shared_ptr<ov::Model> FrontEnd::decode(const InputModel::Ptr& model) const
 }
 
 void FrontEnd::normalize(const std::shared_ptr<ov::Model>& function) const {
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("Frontend:TFLite:normalize");
     // Mark quantized and f16/bf16 compressed constants to prevent CF for them,
     // so that not extra memory is used for intermediate decompressed constants.
     manager.register_pass<ov::pass::MarkCompressedFloatConstants>();
diff --git a/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp b/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp
index aa38ed72d839e5..ffa8438817bb77 100644
--- a/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp
+++ b/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp
@@ -198,7 +198,7 @@ pass::TFLQuantizeReplacer::TFLQuantizeReplacer() {
 }
 
 bool pass::TFLQuantizeResolver::run_on_model(const std::shared_ptr<ov::Model>& m) {
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("Frontend:TFLite:TFLQuantizeResolver");
     manager.register_pass<pass::TFLQuantizeConvert>();
     manager.register_pass<pass::TFLQuantizeReplacer>();
     manager.run_passes(m);
diff --git a/src/plugins/auto_batch/src/plugin.cpp b/src/plugins/auto_batch/src/plugin.cpp
index d97987bea6f39a..2e3e17cd43086d 100644
--- a/src/plugins/auto_batch/src/plugin.cpp
+++ b/src/plugins/auto_batch/src/plugin.cpp
@@ -162,7 +162,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
         const bool check_dims = (enable_tput_plugin || enable_tput_cfg);
         // find the batch dim
         auto cloned_model = model->clone();
-        ov::pass::Manager pass_manager;
+        ov::pass::Manager pass_manager("Plugin:AutoBatch");
         pass_manager.register_pass<ov::pass::InitNodeInfo>();
         pass_manager.register_pass<ov::pass::FindBatch>(false, check_dims);
         pass_manager.run_passes(cloned_model);
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
index 934a86bbc8b30e..8667f85e380449 100644
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
@@ -29,7 +29,7 @@ namespace intel_cpu {
 inline void ConvertToCPUSpecificOpset(std::shared_ptr<ov::Model> &nGraphFunc, int subStreamNum) {
     RUN_ON_FUNCTION_SCOPE(ConvertToCPUSpecificOpset);
 
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("CPU:ConvertToCPUSpecificOpset");
     manager.set_per_pass_validation(false);
     CPU_REGISTER_PASS_COMMON(manager, ConvertMatMulToFC);
     CPU_REGISTER_PASS_X64(manager, MoveFCReshapeToWeights);
diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
index e8ac0bbd9748ad..cd8ce3062c815b 100644
--- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -307,7 +307,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
     // Decompression handling related transformations must be run separately from common preLPT pipeline
     // since there is used the same transformations as in LPT related transformations, but with the specific settings.
     // This must be done in order to keep compressed MatMul weights with decompression operations as is
-    ov::pass::Manager decompression_handling_manager;
+    ov::pass::Manager decompression_handling_manager("CPU:DecompressionHandling");
     decompression_handling_manager.set_per_pass_validation(false);
     CPU_REGISTER_PASS_COMMON(decompression_handling_manager, ov::pass::InitNodeInfo);
     const bool useLpt = !defaultPrecisions.empty();
@@ -346,7 +346,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
         ov::pass::ConvertGatherToGatherCompressed);
     decompression_handling_manager.run_passes(model);
 
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("Plugin:CPU");
     manager.set_per_pass_validation(false);
     if (useLpt)
         CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationSubgraph, defaultPrecisions);
@@ -732,7 +732,7 @@ void Transformations::Lpt(const std::vector<ov::element::Type>& defaultPrecision
             QuantizationGranularityRestriction::create<ov::opset1::ConvolutionBackpropData>({0})
         });
 
-    ov::pass::Manager lptManager;
+    ov::pass::Manager lptManager("CPU:LPT");
     CPU_REGISTER_PASS_COMMON(lptManager, LowPrecision,
         supportedPrecisions,
         quantizationRestrictions,
@@ -781,7 +781,7 @@ void Transformations::Lpt(const std::vector<ov::element::Type>& defaultPrecision
 void Transformations::PostLpt() {
     CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, PostLpt);
 
-    ov::pass::Manager postLPTPassManager;
+    ov::pass::Manager postLPTPassManager("CPU:PostLPT");
     postLPTPassManager.set_per_pass_validation(false);
     CPU_REGISTER_PASS_COMMON(postLPTPassManager, ov::pass::UnrollTensorIterator);
     CPU_REGISTER_PASS_COMMON(postLPTPassManager, ov::pass::ReshapePRelu);
@@ -904,7 +904,7 @@ void Transformations::MainSnippets(void) {
                                                                      mha_token_enable_transpose_on_output, is_dynamic_mha_token_enabled,
                                                                      mha_supported_transpose_ranks);
 
-    ov::pass::Manager snippetsManager;
+    ov::pass::Manager snippetsManager("CPU:Snippets");
     snippetsManager.set_per_pass_validation(false);
     if (!ignoreCallback) {
 #if defined(OPENVINO_ARCH_ARM64)
@@ -1106,7 +1106,7 @@ void Transformations::MainSnippets(void) {
 }
 
 void Transformations::PostSnippets(void) {
-    ov::pass::Manager postSnippetsManager;
+    ov::pass::Manager postSnippetsManager("CPU:PostSnippets");
     postSnippetsManager.set_per_pass_validation(false);
     CPU_REGISTER_PASS_COMMON(postSnippetsManager, ov::pass::FakeQuantizeDecomposition);
     CPU_SET_CALLBACK_COMMON(postSnippetsManager,
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_convolution.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_convolution.cpp
index 6a61eae3ed4996..656b4c6fd99c20 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/convert_convolution.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_convolution.cpp
@@ -227,7 +227,7 @@ ConvolutionMatcher::ConvolutionMatcher() {
 }
 
 bool ConvertConvolutionToInternal::run_on_model(const std::shared_ptr<ov::Model>& m) {
-    ov::pass::Manager manager;
+    ov::pass::Manager manager("ConvertConvolutionToInternal");
     auto pass_config = manager.get_pass_config();
     manager.set_per_pass_validation(false);
     manager.register_pass<AsymmetricConvolutionMatcher>();
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index dbe7e858c1e6fe..e99d3851974629 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -208,7 +208,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
     bool enableInt8;
     bool unroll_loop = config.get_property(ov::intel_gpu::enable_loop_unrolling);
     {
-        ov::pass::Manager manager;
+        ov::pass::Manager manager("Plugin:GPU");
         auto pass_config = manager.get_pass_config();
         manager.set_per_pass_validation(false);
 
@@ -667,7 +667,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
             QuantizationGranularityRestriction::create<ov::op::v1::ConvolutionBackpropData>({0}),
         });
 
-        ov::pass::Manager lptManager;
+        ov::pass::Manager lptManager("GPU:LPT");
 
         auto lptPassConfig = lptManager.get_pass_config();
         // quantized LSTMSequence / GPUSequence are not supported yet. Avoid extra transformation
@@ -756,7 +756,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
 
     {
         OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply::run_passes");
-        ov::pass::Manager manager;
+        ov::pass::Manager manager("GPU:UnrollTensorIterator");
         // This ConstantFolding pass is added to fold reshapes added for constant inputs on NMS internal operation which prevents upper-bound calculation
         // TODO: check why we have these reshapes
         manager.register_pass<ov::pass::ConstantFolding>();
@@ -776,7 +776,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
     }
 
     {
-        ov::pass::Manager manager;
+        ov::pass::Manager manager("GPU:PostLPT");
 
         // Other ops support eltwise fusions
         const std::vector<DiscreteTypeInfo> allowed_data_movement_ops = {
diff --git a/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp b/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp
index c0881d75cd7be9..cc9655a38dd3ff 100644
--- a/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp
+++ b/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp
@@ -33,7 +33,7 @@ IRSerializer::IRSerializer(const std::shared_ptr<const ov::Model>& origModel, co
 void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weights) {
     _logger.debug("serializeModelToStream");
     const auto passConfig = std::make_shared<ov::pass::PassConfig>();
-    ov::pass::Manager manager(passConfig);
+    ov::pass::Manager manager(passConfig, "NPU:serializeModelToStream");
 
     if (_supportedOpset < 11) {
         // Downgrade to opset10
diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp
index b0abb1c232e52b..ee885f67e188b5 100644
--- a/src/plugins/template/src/plugin.cpp
+++ b/src/plugins/template/src/plugin.cpp
@@ -64,7 +64,7 @@ ov::SoPtr<ov::IRemoteContext> ov::template_plugin::Plugin::get_default_context(
 // ! [plugin:transform_model]
 void transform_model(const std::shared_ptr<ov::Model>& model) {
     // Perform common optimizations and device-specific transformations
-    ov::pass::Manager passManager;
+    ov::pass::Manager passManager("Plugin:Template");
     // Example: register CommonOptimizations transformation from transformations library
     passManager.register_pass<ov::pass::CommonOptimizations>();
     // Disable some transformations