From a9bfd0ff0e6683cb4b2c56d8ed1795472b9e0f2e Mon Sep 17 00:00:00 2001 From: Prakash Date: Mon, 29 Jul 2024 20:48:32 +0530 Subject: [PATCH 01/10] [OV JS] Add optical-character-recognition sample script (#25653) ## Details: - added code in script - updated the samples list in readme Few Doubts and Questions - do I need to strictly check for weather tha passed model exists and is correct for example , does it have a corrosponing weights file present in the directory ? - do I also need to accept the output location from the user ? where does they want to save the output images ? currently I am saving them in a directory named results , and also logging the save location once the sample finishes - I'm logging bounding boxes along with the annotated text , let me know if I need to change anything there as far as logging details are concerned - should I describe the process step by step using comments like you did in the hello_classification sample ``` //----------------- Step 1. Initialize OpenVINO Runtime Core ----------------- //----------------- Step 2. Read a model ------------------------------------- //----------------- Step 3. Set up input ------------------------------------- Please let me know your feedback @Aliczi @vishniakov-nikolai With Regards Prakash --------- Co-authored-by: Vishniakov Nikolai --- samples/js/node/README.md | 1 + .../optical_character_recognition/README.md | 6 + .../optical-character-recognition.js | 422 ++++++++++++++++++ 3 files changed, 429 insertions(+) create mode 100644 samples/js/node/optical_character_recognition/README.md create mode 100644 samples/js/node/optical_character_recognition/optical-character-recognition.js diff --git a/samples/js/node/README.md b/samples/js/node/README.md index 7375219ccf2c0a..7cde08ffbef0c9 100644 --- a/samples/js/node/README.md +++ b/samples/js/node/README.md @@ -34,6 +34,7 @@ You can run this sample in the browser; no installation is required. [Codesandbox](https://codesandbox.io/) is a free online service with limited resources. For optimal performance and more control, it is recommended to run the sample locally. - [hello-classification-sample](https://codesandbox.io/p/devbox/openvino-node-hello-classification-sample-djl893) +- optical-character-recognition-sample ## See Also diff --git a/samples/js/node/optical_character_recognition/README.md b/samples/js/node/optical_character_recognition/README.md new file mode 100644 index 00000000000000..130566ca0bcd4c --- /dev/null +++ b/samples/js/node/optical_character_recognition/README.md @@ -0,0 +1,6 @@ +# Optical Character Recognition Node.js Sample + +Run: +```bash +node hello_reshape_ssd.js *path_to_detection_model_file* *path_to_recognition_model_file* *path_to_img* AUTO +``` \ No newline at end of file diff --git a/samples/js/node/optical_character_recognition/optical-character-recognition.js b/samples/js/node/optical_character_recognition/optical-character-recognition.js new file mode 100644 index 00000000000000..5e371c1975a993 --- /dev/null +++ b/samples/js/node/optical_character_recognition/optical-character-recognition.js @@ -0,0 +1,422 @@ +const { addon: ov } = require('openvino-node'); +const fs = require('node:fs'); +const path = require('node:path'); +const { createCanvas, ImageData } = require('canvas'); +const { cv } = require('opencv-wasm'); +const { + transform, + getImageData, + argMax, + setShape, +} = require('../helpers.js'); + +if (require.main === module) { +// Parsing and validation of input arguments + if (process.argv.length !== 6) + throw new Error( + `Usage: ${process.argv[1]} ` + + ' ' + + ' ', + ); + + const detModelXMLPath = process.argv[2]; + const recModelXMLPath = process.argv[3]; + const imagePath = process.argv[4]; + const deviceName = process.argv[5]; + + try { + main(detModelXMLPath, recModelXMLPath, imagePath, deviceName); + } catch(error) { + console.error('Error Occurred', error); + } +} + +async function main(detModelXMLPath, recModelXMLPath, imagePath, deviceName) { + // Initialize OpenVINO core and load the detection mode + const core = new ov.Core(); + const detModel = await core.readModel(detModelXMLPath); + const detCompiledModel = await core.compileModel(detModel, deviceName); + const detInputLayer = detCompiledModel.input(0); + const detOutputLayer = detCompiledModel.output('boxes'); + + const imageData = await getImageData(imagePath); + const inputImageMat = cv.matFromImageData(imageData); + + // Resize the image to meet network input size + const [, , H, W] = detInputLayer.shape; + const resizedImage = new cv.Mat(); + cv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_RGBA2RGB); + cv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB); + cv.resize(inputImageMat, resizedImage, new cv.Size(W, H)); + + // Prepare input tensor + const inputImage = transform( + resizedImage.data, + { width: W, height: H }, + [0, 1, 2], + ); + const tensorData = new Float32Array(inputImage); + const tensor = new ov.Tensor(ov.element.f32, detInputLayer.shape, tensorData); + + const detInferRequest = detCompiledModel.createInferRequest(); + + const detResult = await detInferRequest.inferAsync([tensor]); + const boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]); + + const recModel = await core.readModel(recModelXMLPath); + const recModelCompiled = await core.compileModel(recModel, deviceName); + const recInputLayer = recModelCompiled.input(0); + const recOutputLayer = recModelCompiled.output(0); + + // Process each bounding box and run inference on the recognition model + const [, , height, width] = recInputLayer.shape; + // Calculate ratios + const { ratioX, ratioY } = calculateRatios(inputImageMat, resizedImage); + + // Convert image to grayscale + const grayscaleImage = convertToGrayscale(inputImageMat); + + const annotations = []; + const croppedImages = []; + + for (let i = 0; i < boundingBoxesArray.length; i++) { + const crop = boundingBoxesArray[i]; + const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, crop).map( + Math.floor, + ); + const cropRect = new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin); + const croppedImage = grayscaleImage.roi(cropRect); + + try { + const preprocessedCrop = resizeAndConvertCropToModelInput(croppedImage, [ + width, + height, + ]); + const tensorData = new Float32Array(preprocessedCrop); + const tensor = new ov.Tensor( + ov.element.f32, + Int32Array.from(recInputLayer.shape), + tensorData, + ); + + await inferAsyncProcess( + tensor, + recModelCompiled, + recOutputLayer, + i, + annotations, + ); + + croppedImages.push(cropImage(inputImageMat, xMin, yMin, xMax, yMax)); + } catch(error) { + console.error('Error during preprocessing:', error); + } + + croppedImage.delete(); + } + + grayscaleImage.delete(); + + const boxesWithAnnotations = boundingBoxesArray.map((box, index) => ({ + box, + annotation: annotations[index], + })); + + logBoxesWithAnnotations(boxesWithAnnotations); + + convertResultToImage( + inputImageMat, + resizedImage, + boxesWithAnnotations, + { threshold: 0.3, confLabels: true }, + './assets/results/output_image.jpg', + ); + + croppedImages.forEach((croppedImage, i) => { + const savePath = `./assets/results/cropped_image_${i}.jpg`; + saveImage(croppedImage, savePath); + }); +} + +// Function to extract bounding boxes from the model output +function extractBoundingBoxes(output) { + const { data: boxes } = output; + const foldingCoefficient = 5; + const numberOfBoxes = boxes.length / foldingCoefficient; + + return setShape(boxes, [numberOfBoxes, foldingCoefficient]); +} + +// Function to calculate the ratios for the image +function calculateRatios(originalImage, resizedImage) { + const realY = originalImage.rows; + const realX = originalImage.cols; + const resizedY = resizedImage.rows; + const resizedX = resizedImage.cols; + const ratioX = realX / resizedX; + const ratioY = realY / resizedY; + + return { ratioX, ratioY }; +} + +// Function to convert the image to grayscale +function convertToGrayscale(originalImage) { + const grayscaleImage = new cv.Mat(); + cv.cvtColor(originalImage, grayscaleImage, cv.COLOR_BGR2GRAY); + + return grayscaleImage; +} + +// Function to adjust bounding box coordinates by a given ratio +function multiplyByRatio(ratioX, ratioY, box) { + const scaleShape = (shape, idx) => + idx % 2 ? Math.max(shape * ratioY, 10) : shape * ratioX; + + return box.map(scaleShape); +} + +// Function to resize and convert a crop to the recognition model input format +function resizeAndConvertCropToModelInput(crop, netShape) { + const [netWidth, netHeight] = netShape; + + // Resize the crop to the network's input shape + const tempImg = new cv.Mat(); + cv.resize(crop, tempImg, new cv.Size(netWidth, netHeight)); + + // Create the reshaped buffer + const reshapedBuffer = new Uint8Array(netHeight * netWidth); + let index = 0; + + for (let i = 0; i < netHeight; i++) { + for (let j = 0; j < netWidth; j++) { + reshapedBuffer[index++] = tempImg.ucharPtr(i, j)[0]; + } + } + + // Clean up + tempImg.delete(); + + return reshapedBuffer; +} + +// Function to extract recognition results from the model output +function extractRecognitionResults(output) { + const outputData = output.getData(); + const outputShape = output.getShape(); + const [, height, width] = outputShape; + + return setShape(outputData, [height, width]); +} + +// Function to parse annotations from the recognition results +function parseAnnotations(recognitionResults) { + const letters = '~0123456789abcdefghijklmnopqrstuvwxyz'; + const annotation = []; + + for (const row of recognitionResults) { + const letterIndex = argMax(row); + const parsedLetter = letters[letterIndex]; + + // Stop if end character is encountered + if (parsedLetter === letters[0]) break; + annotation.push(parsedLetter); + } + + return annotation.join(''); +} + +// Function to crop the image based on the bounding box coordinates +function cropImage(originalImage, xMin, yMin, xMax, yMax) { + xMin = Math.max(0, xMin); + yMin = Math.max(0, yMin); + xMax = Math.min(originalImage.cols, xMax); + yMax = Math.min(originalImage.rows, yMax); + if (xMin >= xMax || yMin >= yMax) { + throw new Error('Invalid crop coordinates'); + } + const roi = originalImage.roi( + new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin), + ); + const cropped = new cv.Mat(); + roi.copyTo(cropped); + roi.delete(); + + return cropped; +} + +// Get Text size +function getTextSize(text, fontFace, fontScale) { + const canvas = createCanvas(200, 200); + const ctx = canvas.getContext('2d'); + const adjustedFontScale = fontScale * 35; + ctx.font = `${adjustedFontScale}px ${fontFace}`; + const metrics = ctx.measureText(text); + const width = metrics.width; + const height = + metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent; + + return { width, height }; +} + +/* The convertResultToImage function visualizes object detection + results on an image by drawing bounding boxes around detected + objects and optionally adding labels to them. */ + +function convertResultToImage( + bgrImage, + resizedImage, + boxesWithAnnotations, + options, + savePath, +) { + const defaultOptions = { threshold: 0.3, confLabels: true }; + const { threshold, confLabels } = Object.assign(defaultOptions, options); + + const colors = { + red: [255, 0, 0, 255], + green: [0, 255, 0, 255], + white: [255, 255, 255, 255], + }; + const [realY, realX] = [bgrImage.rows, bgrImage.cols]; + const [resizedY, resizedX] = [resizedImage.rows, resizedImage.cols]; + const [ratioX, ratioY] = [realX / resizedX, realY / resizedY]; + + const rgbImage = new cv.Mat(); + cv.cvtColor(bgrImage, rgbImage, cv.COLOR_BGR2RGB); + + boxesWithAnnotations.forEach(({ box, annotation }) => { + const conf = box[box.length - 1]; + + if (conf < threshold) return; + + const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, box); + + cv.rectangle( + rgbImage, + new cv.Point(xMin, yMin), + new cv.Point(xMax, yMax), + colors.green, + 3, + ); + + if (!confLabels) return; + + const text = `${annotation}`; + const fontScale = 0.8; + const thickness = 1; + const { width: textW, height: textH } = getTextSize( + text, + 'Arial', + fontScale, + ); + const imageCopy = rgbImage.clone(); + + cv.rectangle( + imageCopy, + new cv.Point(xMin, yMin - textH - 10), + new cv.Point(xMin + textW, yMin - 10), + colors.white, + cv.FILLED, + ); + cv.addWeighted(imageCopy, 0.4, rgbImage, 0.6, 0, rgbImage); + cv.putText( + rgbImage, + text, + new cv.Point(xMin, yMin - 10), + cv.FONT_HERSHEY_SIMPLEX, + fontScale, + colors.red, + thickness, + cv.LINE_AA, + ); + + imageCopy.delete(); + }); + + const saveDir = path.dirname(savePath); + if (!fs.existsSync(saveDir)) { + fs.mkdirSync(saveDir, { recursive: true }); + } + + try { + saveImage(rgbImage, savePath); + } catch(e) { + console.log(`Error occurred while saving ----> ${e}`); + } + + return rgbImage; +} + +// Infer async helper function + +async function inferAsyncProcess( + tensor, + recModelCompiled, + recOutputLayer, + i, + annotations, +) { + // Create infer request + const inferRequest = recModelCompiled.createInferRequest(); + + // Define the completion callback function + function completionCallback(outputTensor, i, annotations) { + const recognitionResults = extractRecognitionResults(outputTensor); + const annotation = parseAnnotations(recognitionResults); + annotations.push(annotation); + } + + // Start inference in asynchronous mode + try { + const result = await inferRequest.inferAsync([tensor]); + completionCallback(result[recOutputLayer], i, annotations); + } catch(error) { + console.error('Error during inference:', error); + } +} + +// Log boudning boxes with annotations +function logBoxesWithAnnotations(boxesWithAnnotations) { + boxesWithAnnotations.forEach((item, i) => { + const { box, annotation } = item; + console.log(`Box ${i}: [${box}], Annotation: ${annotation}`); + }); +} + +function saveImage(rgbImage, savePath) { + const canvas = createCanvas(rgbImage.cols, rgbImage.rows); + const ctx = canvas.getContext('2d'); + const componentsPerPixel = + rgbImage.data.length / (rgbImage.cols * rgbImage.rows); + const imgDataArr = []; + + if (componentsPerPixel === 1) { + for (const val of rgbImage.data) { + imgDataArr.push(val, val, val, 255); + } + } else if (componentsPerPixel === 3) { + for (let i = 0; i < rgbImage.data.length; i++) { + if (i % 3 === 0) imgDataArr.push(255); + imgDataArr.push(rgbImage.data[i]); + } + } + + const imageData = new ImageData( + new Uint8ClampedArray(imgDataArr), + rgbImage.cols, + rgbImage.rows, + ); + ctx.putImageData(imageData, 0, 0); + + const dataURL = canvas.toDataURL('image/jpeg'); + const base64Data = dataURL.replace(/^data:image\/jpeg;base64,/, ''); + const imageBuffer = Buffer.from(base64Data, 'base64'); + + const saveDir = path.dirname(savePath); + if (!fs.existsSync(saveDir)) { + fs.mkdirSync(saveDir, { recursive: true }); + } + + fs.writeFileSync(savePath, imageBuffer); + console.log('Image saved successfully!', savePath); +} From 131c944a258dfd625cf87667d764cb4103e73d81 Mon Sep 17 00:00:00 2001 From: Ilya Albrecht Date: Mon, 29 Jul 2024 08:33:05 -0700 Subject: [PATCH 02/10] [GPU] Use array for tracking memory usage instead of map (#25269) ### Details: - Any additional locking and synchronization on memory allocation might have negative impact on MT execution. - `std::map` has very slow access are requires lock on every access. We can use `std::array` instead to hold compile time known number of buckets. - `array` container has lower access latency and memory overhead. - We might me able to remove mutex lock on stat collection. --- .../include/intel_gpu/runtime/engine.hpp | 5 +- .../include/intel_gpu/runtime/memory_caps.hpp | 1 + src/plugins/intel_gpu/src/runtime/engine.cpp | 63 ++++++++----------- 3 files changed, 30 insertions(+), 39 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp index 320e2b466de5a4..7e77ceb6785cb5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp @@ -167,10 +167,9 @@ class engine { /// Create engine for given @p device and @p configuration engine(const device::ptr device); const device::ptr _device; - mutable std::mutex _mutex; - std::map> _memory_usage_map; - std::map> _peak_memory_usage_map; + std::array, static_cast(allocation_type::max_value)> _memory_usage_data{}; + std::array, static_cast(allocation_type::max_value)> _peak_memory_usage_data{}; }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp index 306a23fe1c3aaa..0a8da995d9af02 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp @@ -18,6 +18,7 @@ enum class allocation_type { usm_host, // Accessible by host and device. Not Migratable usm_shared, // Accessible by host and device. Migrtable. usm_device, // Accessible only by device. Not migratable. + max_value, // Used for data array size. Shall be last }; inline std::ostream& operator<<(std::ostream& out, const allocation_type& alloc_type) { diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp index ec0beef6a8aa31..73da14f6e16f47 100644 --- a/src/plugins/intel_gpu/src/runtime/engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/engine.cpp @@ -197,65 +197,56 @@ memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint #endif // _WIN32 uint64_t engine::get_max_used_device_memory() const { - std::lock_guard guard(_mutex); uint64_t total_peak_memory_usage {0}; - for (auto const& m : _peak_memory_usage_map) { - total_peak_memory_usage += m.second.load(); + for (auto const& m : _peak_memory_usage_data) { + total_peak_memory_usage += m.load(); } return total_peak_memory_usage; } uint64_t engine::get_max_used_device_memory(allocation_type type) const { - std::lock_guard guard(_mutex); - uint64_t peak_memory_usage {0}; - auto iter = _peak_memory_usage_map.find(type); - if (iter != _peak_memory_usage_map.end()) { - peak_memory_usage = iter->second.load(); - } - return peak_memory_usage; + return _peak_memory_usage_data[static_cast(type)].load(); } uint64_t engine::get_used_device_memory(allocation_type type) const { - std::lock_guard guard(_mutex); - uint64_t memory_usage {0}; - auto iter = _memory_usage_map.find(type); - if (iter != _memory_usage_map.end()) { - memory_usage = iter->second.load(); - } - return memory_usage; + return _memory_usage_data[static_cast(type)].load(); } std::map engine::get_memory_statistics() const { - std::lock_guard guard(_mutex); std::map statistics; - for (auto const& m : _memory_usage_map) { - std::ostringstream oss; - oss << m.first; - statistics[oss.str()] = m.second.load(); - } + const auto add_stat = [&](allocation_type type) { + auto idx = static_cast(type); + auto value = _memory_usage_data[idx].load(); + if (value != 0) { + std::ostringstream oss; + oss << type; + statistics[oss.str()] = value; + } + }; + + add_stat(allocation_type::unknown); + add_stat(allocation_type::cl_mem); + add_stat(allocation_type::usm_host); + add_stat(allocation_type::usm_shared); + add_stat(allocation_type::usm_device); return statistics; } void engine::add_memory_used(uint64_t bytes, allocation_type type) { - std::lock_guard guard(_mutex); - if (!_memory_usage_map.count(type) && !_peak_memory_usage_map.count(type)) { - _memory_usage_map[type] = 0; - _peak_memory_usage_map[type] = 0; - } - _memory_usage_map[type] += bytes; - if (_memory_usage_map[type] > _peak_memory_usage_map[type]) { - _peak_memory_usage_map[type] = _memory_usage_map[type].load(); + auto idx = static_cast(type); + const auto new_val = _memory_usage_data[idx].fetch_add(bytes) + bytes; + // Make sure actual maximum value is stored + while (new_val > _peak_memory_usage_data[idx]) { + _peak_memory_usage_data[idx] = new_val; } } void engine::subtract_memory_used(uint64_t bytes, allocation_type type) { - std::lock_guard guard(_mutex); - auto iter = _memory_usage_map.find(type); - if (iter != _memory_usage_map.end()) { - _memory_usage_map[type] -= bytes; - } else { + auto idx = static_cast(type); + if (_memory_usage_data[idx].load() < bytes) { throw std::runtime_error("Attempt to free unallocated memory"); } + _memory_usage_data[idx] -= bytes; } std::shared_ptr engine::create(engine_types engine_type, runtime_types runtime_type, const device::ptr device) { From a2077e3f22f78db22199af2fd385f27462c32c6b Mon Sep 17 00:00:00 2001 From: Kelvin Choi Date: Tue, 30 Jul 2024 08:21:05 +0900 Subject: [PATCH 03/10] [GPU] Fix gemm_tiled_opt kernel to support B_VEC_SIZE > 1 for static (#25631) ### Details: - *Fix gemm_tiled_opt to support B_VEC_SIZE>1 at static shape* ### Tickets: - *139840* --- .../kernel_selector/cl_kernels/gemm_tiled_opt.cl | 14 ++++++++++++-- .../kernels/gemm/gemm_kernel_tiled_opt.cpp | 4 +++- .../tests/unit/test_cases/gemm_gpu_test.cpp | 15 +++++++++++++-- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl index fa30466de60c8c..5ad89020ebf62a 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl @@ -786,6 +786,12 @@ KERNEL(gemm_tiled_opt)( ACCUMULATOR_TYPE_VEC dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id]; #endif // BIAS_TERM + #if TRANSPOSE_OUTPUT == TRANSPOSE_X_LAST + const uint x_pitch = 1; + #else + const uint x_pitch = output_x_pitch; + #endif + #if HAS_FUSED_OPS #if FUSED_OPS_CAN_USE_PRELOAD FUSED_OPS_CALC_VEC; @@ -793,9 +799,13 @@ KERNEL(gemm_tiled_opt)( FUSED_OPS_VEC; #endif // FUSED_OPS_CAN_USE_PRELOAD OUTPUT_TYPE_VEC res = FUSED_OPS_RESULT_VEC; - BLOCK_WRITE_C(d_ptr, 0, res); + unroll_for (uint n_elem = 0; n_elem < B_VEC_SIZE; ++n_elem) { + BLOCK_WRITEN(OUTPUT_TYPE, 1, d_ptr, SIMD_WIDTH * n_elem * output_x_pitch, res[n_elem]); + } #else // HAS_FUSED_OPS - BLOCK_WRITE_C(d_ptr, 0, dequantized); + unroll_for (uint n_elem = 0; n_elem < B_VEC_SIZE; ++n_elem) { + BLOCK_WRITEN(OUTPUT_TYPE, 1, d_ptr, SIMD_WIDTH * n_elem * output_x_pitch, dequantized[n_elem]); + } #endif // HAS_FUSED_OPS #endif // TILE_N_NOT_DIVISIBLE || B_VEC_SIZE == 1 #endif // IS_DYNAMIC diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp index e59f424e5d6af7..43f32f3e8e18b0 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp @@ -96,7 +96,9 @@ GemmKernelTiledOpt::GemmTuningData GemmKernelTiledOpt::SetTuningParams(const gem tuning_data.tile_m_size = tuning_data.simd_size; } // Increasing tile_n_size has performance improvement when m_size and n_size are not shallow and n_size is aligned at 32. - if (m_size >= 128 && n_size >= 128 && (n_size % 32 == 0) && tuning_data.simd_size == 16 && params.fused_ops.empty()) + // TODO: Support TILE_K_LEFTOVER true case at static shape + if (m_size >= 128 && n_size >= 128 && (n_size % 32 == 0) && tuning_data.simd_size == 16 && + (k_size % tuning_data.tile_k_size == 0) && params.fused_ops.empty()) tuning_data.tile_n_size = 32; GPU_DEBUG_LOG << params.layerID << ": m_size: " << m_size << ", n_size: " << n_size << ", k_size: " << k_size << std::endl; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index 2e4bebe8f3274d..86e054ec93292d 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -1316,7 +1316,7 @@ class gemm_gpu_tests: public ::testing::Test { } } - void test_transpose_matmul_f16(size_t num_dims, bool is_input_dynamic, bool is_caching_test, std::vector BMKN, std::vector input0_order, std::vector input1_order) { + void test_transpose_matmul_f16(size_t num_dims, bool is_input_dynamic, bool is_caching_test, std::vector BMKN, std::vector input0_order, std::vector input1_order, const double abs_error = 0.0001) { tests::random_generator rg; rg.set_seed(GET_SUITE_NAME); @@ -1411,7 +1411,6 @@ class gemm_gpu_tests: public ::testing::Test { ASSERT_EQ(output_ptr.size(), ref_out_data.size()); - const auto abs_error = 0.0001; for (uint32_t i = 0; i < ref_out_data.size(); ++i) { ASSERT_NEAR(output_ptr[i], ref_out_data[i], abs_error) << "at " << i; } @@ -1487,10 +1486,22 @@ TEST_F(gemm_gpu_tests, transpose_matmul_static_3d_f16) { this->test_transpose_matmul_f16(3, false, false, /*BMKN*/{19, 37, 23, 29}, /*input0_order*/{0, 2, 1}, /*input1_order*/{1, 2, 0}); } +TEST_F(gemm_gpu_tests, transpose_matmul_static_3d_f16_n32) { + this->test_transpose_matmul_f16(3, false, false, /*BMKN*/{1, 256, 32, 128}, /*input0_order*/{0, 1, 2}, /*input1_order*/{0, 2, 1}, 0.1); +} + TEST_F(gemm_gpu_tests, transpose_matmul_static_3d_f32) { this->test_transpose_matmul_f32(3, false, false, /*BMKN*/{19, 37, 23, 29}, /*input0_order*/{0, 2, 1}, /*input1_order*/{1, 2, 0}); } +TEST_F(gemm_gpu_tests, transpose_matmul_static_3d_f32_n32) { + this->test_transpose_matmul_f32(3, false, false, /*BMKN*/{2, 128, 16, 256}, /*input0_order*/{0, 1, 2}, /*input1_order*/{0, 2, 1}); +} + +TEST_F(gemm_gpu_tests, transpose_matmul_static_3d_f32_n32_k_remainder) { + this->test_transpose_matmul_f32(3, false, false, /*BMKN*/{2, 128, 17, 256}, /*input0_order*/{0, 1, 2}, /*input1_order*/{0, 2, 1}); +} + TEST_F(gemm_gpu_tests, transpose_matmul_dynamic_4d_f16_unaligned) { this->test_transpose_matmul_f16(4, true, false, /*BMKN*/{19, 37, 23, 29}, /*input0_order*/{0, 2, 3, 1}, /*input1_order*/{1, 2, 3, 0}); } From 7851b8e353d08eea88744084eee42009c7be8cde Mon Sep 17 00:00:00 2001 From: Luwei Zhou Date: Tue, 30 Jul 2024 13:14:22 +0800 Subject: [PATCH 04/10] [CPU] Migrate to onednn 3.5 (#25153) ### Details: OV: - *1. Parse impl_type based one onednn updated verbose, brg_conv-> brgconv, avx10_1_512->avx512* - *2. onednn brgemm_t->brgemm_desc_t* - *3. onednn create_brgemm_matmul_copy_b() API needs to specify stride and transpose explicitly* - *4. jit generator constructor changes in onednn* - *5. onednn upgrades xbyak version to v7.05. Needs to upgrade the thirdparty/xbyak to same version to avoid exceptions caused by static build symbol collision* - *6. debugging information about impl_type* - *7. Remove dnnl_memory_set_data_handle_no_pads_proc() invocation* ONEDNN 3.5 clean up: - *1. Remove dnnl_memory_set_data_handle_no_pads_proc() and remove the legacy commit* - *2. Separate arm commits with X86 for maintainability* - *3. squash work* - *4. revert https://github.com/oneapi-src/oneDNN/commit/cf5a4fedb42bdf54e06269208945459c699890c2 because of perf regression* ONEDNN branch: backup without squashing: origin/luwei/v3.5_for_ie_master_bak after squashing: https://github.com/openvinotoolkit/oneDNN/tree/v3.5_for_ie_master_squash ### Tickets: - *[CVS 12434](https://jira.devtools.intel.com/browse/CVS-124341)* --------- Co-authored-by: Zhang Yi3 --- src/plugins/intel_cpu/src/cpu_memory.cpp | 13 +++--------- .../x64/jit_brgemm_copy_b_emitter.cpp | 5 ++++- .../snippets/x64/kernel_executors/brgemm.cpp | 2 +- src/plugins/intel_cpu/src/graph.cpp | 20 +++++++++++++++++-- src/plugins/intel_cpu/src/node.cpp | 13 +++++++++++- src/plugins/intel_cpu/src/nodes/conv.cpp | 13 +++++++++++- .../src/nodes/kernels/x64/brgemm_kernel.cpp | 16 ++++++--------- .../src/nodes/kernels/x64/jit_kernel_base.cpp | 2 +- src/plugins/intel_cpu/src/nodes/matmul.cpp | 13 +++++++++++- src/plugins/intel_cpu/src/nodes/mha.cpp | 6 ++++-- src/plugins/intel_cpu/src/nodes/pooling.cpp | 13 +++++++++++- src/plugins/intel_cpu/src/nodes/rnn.cpp | 5 +++++ .../intel_cpu/src/onednn/iml_type_mapper.cpp | 9 +++++++-- src/plugins/intel_cpu/thirdparty/onednn | 2 +- thirdparty/xbyak | 2 +- 15 files changed, 99 insertions(+), 35 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 515fe92845702c..ab454382f57d73 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -145,7 +145,7 @@ void Memory::redefineDesc(MemoryDescPtr desc) { void Memory::update() { if (dnnlMemHandle.isInit()) { auto prim = dnnlMemHandle.getPrim(); - prim.set_data_handle_no_pads_proc(m_mgrHandle->getRawPtr()); + prim.set_data_handle(m_mgrHandle->getRawPtr()); } } @@ -177,12 +177,8 @@ dnnl::memory Memory::DnnlMemPrimHandle::getPrim() const { // // ======================== auto data = m_memObjPtr->getDataNoThrow(); - auto pads_zeroing = m_memObjPtr->m_padsZeroing; if (data != nullptr) { - if (pads_zeroing) - m_prim.set_data_handle(data); - else - m_prim.set_data_handle_no_pads_proc(data); + m_prim.set_data_handle(data); } } return m_prim; @@ -498,10 +494,7 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo m_prim = dnnl::memory(dnnl_desc->getDnnlDesc(), m_eng, DNNL_MEMORY_NONE); // // ======================== - if (pads_zeroing) - m_prim.set_data_handle(m_pMemMngr->getRawPtr()); - else - m_prim.set_data_handle_no_pads_proc(m_pMemMngr->getRawPtr()); + m_prim.set_data_handle(m_pMemMngr->getRawPtr()); } catch (const std::exception& exc) { dnnlErrorCtx = exc.what(); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp index a31c4a18c68824..1cee6528ec9592 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp @@ -85,9 +85,10 @@ void jit_brgemm_copy_b_emitter::init_brgemm_copy(std::unique_ptr(DnnlExtensionUtils::ElementTypeToDataType(src_dt)); brgCopyKernelConf.wei_dt = static_cast(DnnlExtensionUtils::ElementTypeToDataType(wei_dt)); + brgCopyKernelConf.orig_wei_dt = brgCopyKernelConf.wei_dt; brgCopyKernelConf.wei_n_blk = static_cast(N_blk); brgCopyKernelConf.wei_tag = dnnl_abcd; // What's about other ranks? - brgCopyKernelConf.copy_B_wei_stride = 0; + brgCopyKernelConf.transposed_B = false; brgCopyKernelConf.LDB = static_cast(LDB); brgCopyKernelConf.N = static_cast(N); brgCopyKernelConf.N_tail = static_cast(N_tail); @@ -97,6 +98,8 @@ void jit_brgemm_copy_b_emitter::init_brgemm_copy(std::unique_ptr(brgCopyKernelConf.src_dt)); brgCopyKernelConf.tr_b_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast(brgCopyKernelConf.src_dt)); + brgCopyKernelConf.copy_B_wei_stride = brgCopyKernelConf.N * brgCopyKernelConf.b_dt_sz; + brgCopyKernelConf.req_wei_vnni_downconvert = false; diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp index e538c3baef28bb..45c0923f4b4dbd 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp @@ -134,7 +134,7 @@ std::shared_ptr BrgemmKernelExecutor::compile_kernel(const if (config.is_empty()) return compiled_kernel; - cpu::x64::brgemm_t desc; + cpu::x64::brgemm_desc_t desc; auto status = brgemm_desc_init(&desc, config.get_isa(), cpu::x64::brgemm_strd, config.get_dt_in0(), config.get_dt_in1(), false, false, cpu::x64::brgemm_row_major, 1.f, diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 490c15fceb2ec4..4f3bf381d20720 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -392,7 +392,23 @@ void Graph::InitDescriptors() { OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.initSupportedPrimitiveDescriptors); DEBUG_LOG("Init supported primitive descriptors for node: ", node->getName()); node->initSupportedPrimitiveDescriptors(); - +#ifdef CPU_DEBUG_CAPS + { + const auto& SPDs = node->getSupportedPrimitiveDescriptors(); + for (size_t i = 0; i < SPDs.size(); i++) { + DEBUG_LOG("#", + node->getExecIndex(), + " ", + node->getName(), + " Before filter, SupportedPrimitiveDescriptors [", + i, + "/", + SPDs.size(), + "]: \n", + SPDs[i]); + } + } +#endif OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.filterSupportedPrimitiveDescriptors); DEBUG_LOG("Filter supported primitive descriptors for node: ", node->getName()); node->filterSupportedPrimitiveDescriptors(); @@ -404,7 +420,7 @@ void Graph::InitDescriptors() { node->getExecIndex(), " ", node->getName(), - " SupportedPrimitiveDescriptors [", + " After filter, SupportedPrimitiveDescriptors [", i, "/", SPDs.size(), diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 41c3011f8707ec..326b3e907dcc8f 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -671,10 +671,21 @@ void Node::initSupportedPrimitiveDescriptors() { * since custom implementations can be not available at all, so a fallback to the default ones must happen * To achive the fallback, it is necessary to create a supported primitive descriptor for each implementation * since oneDNN primitive is mutating while iterating */ - +#ifdef CPU_DEBUG_CAPS + { + if (!customImplPriorities.empty()) { + DEBUG_LOG("#", getName(), " customImplPriorities [", 0 , "/", customImplPriorities.size(), + "]: ", impl_type_to_string(customImplPriorities[0])); + } + } +#endif for (auto& desc : descs) { auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get())); const bool first_match = customImplPriorities.empty(); + DEBUG_LOG("#", getName(), + ", itpd.impl_info_str(): ", desc.impl_info_str(), + ", parsed imp_type: ", impl_type_to_string(parse_impl_name(desc.impl_info_str())), + ", first_match: ", first_match ? "true" : "false"); DnnlExtensionUtils::for_each_implementation(desc, first_match, [&](impl_desc_type implType) { diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index e22a36af852a14..2422e2d3bb041c 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -805,7 +805,14 @@ void Convolution::initSupportedPrimitiveDescriptors() { supportedPrimitiveDescriptors.emplace_back(config, impl_type); }; - +#ifdef CPU_DEBUG_CAPS + { + if (!customImplPriorities.empty()) { + DEBUG_LOG("#", getName(), " customImplPriorities [", 0 , "/", customImplPriorities.size(), + "]: ", impl_type_to_string(customImplPriorities[0])); + } + } +#endif for (size_t dIdx = 0; dIdx < descs.size(); dIdx++) { auto& desc = descs[dIdx]; auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get())); @@ -816,6 +823,10 @@ void Convolution::initSupportedPrimitiveDescriptors() { }; const bool first_match = customImplPriorities.empty(); + DEBUG_LOG("#", getName(), + ", itpd.impl_info_str(): ", desc.impl_info_str(), + ", parsed imp_type: ", impl_type_to_string(parse_impl_name(desc.impl_info_str())), + ", first_match: ", first_match ? "true" : "false"); DnnlExtensionUtils::for_each_implementation(desc, first_match, [&](impl_desc_type implType) { diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp index 86f80b33a8c875..e729fac66dd257 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp @@ -104,12 +104,7 @@ BrgemmKernel::BrgemmKernel(size_t M, if (brgemmCtx0.is_with_amx || inType == ov::element::bf16 || b_transposed) { size_t b_stride = 0; - // must set actual stride when stride is not K/N - if (b_transposed) { - b_stride = ldb == K ? 0 : ldb * inType.size(); - } else { - b_stride = ldb == N ? 0 : ldb * inType.size(); - } + b_stride = ldb * inType.size(); // K should use the original K init_brgemm_copy_b(brgCopyBKernel, N, @@ -136,7 +131,7 @@ const size_t BrgemmKernel::get_scratch_b_size() const { void BrgemmKernel::init_brgemm(brgemmCtx& ctx, std::unique_ptr& brgKernel, bool use_amx) { - brgemm_t brgDesc; + brgemm_desc_t brgDesc; const bool is_int8 = one_of(ctx.dt_in0, data_type::u8, data_type::s8) && one_of(ctx.dt_in1, data_type::u8, data_type::s8); @@ -245,10 +240,12 @@ void BrgemmKernel::init_brgemm_copy_b( brgemm_matmul_conf_t brgCopyKernelConf; brgCopyKernelConf.src_dt = dt_in0; brgCopyKernelConf.wei_dt = dt_in1; + brgCopyKernelConf.orig_wei_dt = dt_in1; brgCopyKernelConf.wei_n_blk = N_blk; - // B could come from strided tensor, must use copy_B_wei_stride if set. - brgCopyKernelConf.wei_tag = copy_B_wei_stride != 0 ? transpose ? dnnl_adbc : dnnl_acbd : transpose ? dnnl_ba : dnnl_ab; + brgCopyKernelConf.wei_tag = transpose ? dnnl_ba : dnnl_ab; brgCopyKernelConf.copy_B_wei_stride = copy_B_wei_stride; + brgCopyKernelConf.transposed_B = transpose; + // LDB here is for the target tensor, not source tensor brgCopyKernelConf.LDB = LDB; brgCopyKernelConf.N = N; @@ -274,7 +271,6 @@ void BrgemmKernel::init_brgemm_copy_b( brgCopyKernelConf.has_zero_point_a = false; brgCopyKernelConf.has_zero_point_b = false; brgCopyKernelConf.src_zp_type = dnnl::impl::cpu::x64::none; - auto ret = create_brgemm_matmul_copy_b(brgCopyKernel, &brgCopyKernelConf); if (ret != dnnl::impl::status_t::dnnl_success) THROW_ERROR("cannot create_brgemm_matmul_copy_b kernel"); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp index f62d6e6c351a52..8fd3a966e13887 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp @@ -11,7 +11,7 @@ namespace intel_cpu { namespace kernel { JitKernelBase::JitKernelBase(const char* name, x64::cpu_isa_t isa) - : x64::jit_generator(name, nullptr, x64::MAX_CODE_SIZE, true, isa), m_isa(isa) { + : x64::jit_generator(name, isa), m_isa(isa) { vlen = x64::isa_max_vlen(isa); } diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index 4355af1ea12993..2841e6f100afb7 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -484,10 +484,21 @@ void MatMul::initSupportedPrimitiveDescriptors() { supportedPrimitiveDescriptors.emplace_back(config, impl_type); }; - +#ifdef CPU_DEBUG_CAPS + { + if (!customImplPriorities.empty()) { + DEBUG_LOG("#", getName(), " customImplPriorities [", 0 , "/", customImplPriorities.size(), + "]: ", impl_type_to_string(customImplPriorities[0])); + } + } +#endif for (auto& desc : descs) { auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get())); const bool first_match = customImplPriorities.empty(); + DEBUG_LOG("#", getName(), + ", itpd.impl_info_str(): ", desc.impl_info_str(), + ", parsed imp_type: ", impl_type_to_string(parse_impl_name(desc.impl_info_str())), + ", first_match: ", first_match ? "true" : "false"); DnnlExtensionUtils::for_each_implementation(desc, first_match, [&](impl_desc_type implType) { diff --git a/src/plugins/intel_cpu/src/nodes/mha.cpp b/src/plugins/intel_cpu/src/nodes/mha.cpp index 3e87225fdbdba5..0a7fb66fc8897b 100644 --- a/src/plugins/intel_cpu/src/nodes/mha.cpp +++ b/src/plugins/intel_cpu/src/nodes/mha.cpp @@ -801,7 +801,7 @@ void MHA::initSupportedPrimitiveDescriptors() { void MHA::init_brgemm(brgemmCtx& ctx, std::unique_ptr& brgKernel, bool use_amx) { #ifdef OPENVINO_ARCH_X86_64 - brgemm_t brgDesc; + brgemm_desc_t brgDesc; brgemm_strides_t strides {static_cast(ctx.M * ctx.K), static_cast(ctx.K * ctx.N)}; const bool is_int8 = one_of(ctx.dt_in0, data_type::u8, data_type::s8) && one_of(ctx.dt_in1, data_type::u8, data_type::s8); @@ -859,9 +859,10 @@ void MHA::init_brgemm_copy_b(std::unique_ptr& brgCop brgemm_matmul_conf_t brgCopyKernelConf; brgCopyKernelConf.src_dt = dt_in0; brgCopyKernelConf.wei_dt = dt_in1; + brgCopyKernelConf.orig_wei_dt = dt_in1; brgCopyKernelConf.wei_n_blk = N_blk; brgCopyKernelConf.wei_tag = dnnl_abcd; - brgCopyKernelConf.copy_B_wei_stride = 0; + brgCopyKernelConf.transposed_B = false; brgCopyKernelConf.LDB = LDB; brgCopyKernelConf.N = N; brgCopyKernelConf.N_tail = N_tail; @@ -872,6 +873,7 @@ void MHA::init_brgemm_copy_b(std::unique_ptr& brgCop brgCopyKernelConf.b_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast(brgCopyKernelConf.src_dt)); brgCopyKernelConf.tr_b_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast(brgCopyKernelConf.src_dt)); brgCopyKernelConf.req_wei_vnni_downconvert = false; + brgCopyKernelConf.copy_B_wei_stride = brgCopyKernelConf.N * brgCopyKernelConf.b_dt_sz; if (is_with_amx) { brgCopyKernelConf.isa = avx512_core_amx; diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index d412cdaecda192..299ba4d15f4b6a 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -647,10 +647,21 @@ void Pooling::initSupportedPrimitiveDescriptors() { supportedPrimitiveDescriptors.emplace_back(config, impl_type); }; - +#ifdef CPU_DEBUG_CAPS + { + if (!customImplPriorities.empty()) { + DEBUG_LOG("#", getName(), " customImplPriorities [", 0 , "/", customImplPriorities.size(), + "]: ", impl_type_to_string(customImplPriorities[0])); + } + } +#endif for (auto& desc : descs) { auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get())); const bool first_match = customImplPriorities.empty(); + DEBUG_LOG("#", getName(), + ", itpd.impl_info_str(): ", desc.impl_info_str(), + ", parsed imp_type: ", impl_type_to_string(parse_impl_name(desc.impl_info_str())), + ", first_match: ", first_match ? "true" : "false"); DnnlExtensionUtils::for_each_implementation(desc, first_match, [&](impl_desc_type implType) { diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index 4558b9c7749b00..7b0e46ce8e5ce7 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -1170,6 +1170,11 @@ void RNN::prepareParams() { OPENVINO_THROW("Primitive descriptor was not found for node ", getName(), "."); } +#ifdef CPU_DEBUG_CAPS + auto pd = execPtr->getPrimitiveDesc(); + DEBUG_LOG("verbose##", getName(), "##", DnnlExtensionUtils::query_pd_info(pd), "\n"); +#endif + if (!primArgs.count(DNNL_ARG_WEIGHTS_LAYER) || !prevExecPtr || !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) { prepareMemory(execPtr->getWeightDesc(), 0); diff --git a/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp b/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp index 3ec7596c784ef3..d7a1e5979ddad9 100644 --- a/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp +++ b/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp @@ -13,10 +13,15 @@ namespace intel_cpu { impl_desc_type parse_impl_name(std::string impl_desc_name) { impl_desc_type res = impl_desc_type::unknown; -#define REPLACE_WORD(_wrd, _sub) auto pos = impl_desc_name.find(#_wrd); \ - if (pos != std::string::npos) impl_desc_name.replace(pos, std::string(#_wrd).length(), #_sub); +#define REPLACE_WORD(_wrd, _sub) { auto pos = impl_desc_name.find(#_wrd); \ + if (pos != std::string::npos) impl_desc_name.replace(pos, std::string(#_wrd).length(), #_sub); } + // Replace the ONEDNN pd name with OV definition. + REPLACE_WORD(brg_conv, brgconv); + REPLACE_WORD(avx10_1_512, avx512); + REPLACE_WORD(brg_matmul, brgemm); REPLACE_WORD(simple, ref); + #undef REPLACE_WORD #define SEARCH_WORD(_wrd) if (impl_desc_name.find(#_wrd) != std::string::npos) \ diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index f1cf31a2fa0979..cebd91ce7f2256 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit f1cf31a2fa097932b8d74e88bf4bd941382504e4 +Subproject commit cebd91ce7f22567790f45d84e0b59b937dcc6e10 diff --git a/thirdparty/xbyak b/thirdparty/xbyak index 58642e0cdd5cbe..2ce465bbca46e9 160000 --- a/thirdparty/xbyak +++ b/thirdparty/xbyak @@ -1 +1 @@ -Subproject commit 58642e0cdd5cbe12f5d6e05069ddddbc0f5d5383 +Subproject commit 2ce465bbca46e92dde9c44bbe7940fd7f70e3b97 From aa33dc52a7b960386a0156fa2d3d402db73e6dfb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:33:51 +0200 Subject: [PATCH 05/10] Bump pyyaml from 6.0 to 6.0.1 in /tests (#24695) Bumps [pyyaml](https://github.com/yaml/pyyaml) from 6.0 to 6.0.1.
Changelog

Sourced from pyyaml's changelog.

6.0.1 (2023-07-18)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=pyyaml&package-manager=pip&previous-version=6.0&new-version=6.0.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
> **Note** > Automatic rebases have been disabled on this pull request as it has been open for over 30 days. Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ilya Lavrenov --- tests/e2e_tests/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e_tests/requirements.txt b/tests/e2e_tests/requirements.txt index 2d380c682819aa..3b5773145d4469 100644 --- a/tests/e2e_tests/requirements.txt +++ b/tests/e2e_tests/requirements.txt @@ -32,7 +32,7 @@ pytest-timeout==2.2.0 # for common utils, e2e_tests openvino-dev distro==1.9.0 -pyyaml==6.0 +pyyaml==6.0.1 jsonschema==4.22.0 # filelock==3.9.0 omegaconf>=2.1,<2.4 From 5b7d9bb7fa1672eb42019dfe5d5b9b4aab43afc5 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Tue, 30 Jul 2024 11:00:59 +0200 Subject: [PATCH 06/10] [PT FE] Update hf tests and add LLM tests (#25758) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../workflows/job_pytorch_models_tests.yml | 2 +- .../pytorch/hf_transformers_models | 656 +++++++----------- .../pytorch/test_hf_transformers.py | 328 +++++---- tests/model_hub_tests/pytorch/test_llm.py | 224 ++++++ tests/model_hub_tests/pytorch/test_timm.py | 4 +- 5 files changed, 665 insertions(+), 549 deletions(-) create mode 100644 tests/model_hub_tests/pytorch/test_llm.py diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index b910d9242647b1..381b0a51eb49df 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -183,7 +183,7 @@ jobs: uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: - name: test-results-torch-models-${{ inputs.model_scope == 'precommit' }} + name: test-results-torch-models-${{ inputs.model_scope }} path: | ${{ env.INSTALL_TEST_DIR }}/TEST-torch* if-no-files-found: 'error' diff --git a/tests/model_hub_tests/pytorch/hf_transformers_models b/tests/model_hub_tests/pytorch/hf_transformers_models index f79f32b6d93ee8..d52215b8b4e81e 100644 --- a/tests/model_hub_tests/pytorch/hf_transformers_models +++ b/tests/model_hub_tests/pytorch/hf_transformers_models @@ -1,420 +1,236 @@ -# List of models obtained from huggingface_hub api as one model per architecture with highest likes and downloads -abcp4/mymodel-test,mymodel,skip,Load problem -abeja/gpt-neox-japanese-2.7b,gpt_neox_japanese -acl-submission-anonym/EAM-spectral,examuse,skip,Load problem -adalbertojunior/modular-test,modular,skip,Load problem -adept/persimmon-8b-base,persimmon -aerner/lm-v2,open-llama -afonsosamarques/ardt-vanilla-combo_train_hopper_v2-2508_1336-33,decision_transformer,xfail,Tracing problem -aihijo/gec-zh-gector-bert-large,gector,skip,Load problem -albert-base-v2,albert -AlekseyKorshuk/test_reward_model,reward_model,skip,Load problem -alibaba-damo/mgp-str-base,mgp-str,xfail,Compile error: unsupported Einsum -allenai/hvila-block-layoutlm-finetuned-docbank,hierarchical_model,skip,Load problem -allenai/longformer-base-4096,longformer -ameya772/sentence-t5-base-atis-fine-tuned,T5,skip,Load problem -andreasmadsen/efficient_mlm_m0.40,roberta-prelayernorm -anton-l/emformer-base-librispeech,emformer,skip,Load problem -anugunj/omnivore-swinL-in21k,omnivore,skip,Load problem -apple/mobilevitv2-1.0-imagenet1k-256,mobilevitv2,xfail,Unsupported op aten::col2im -ArthurZ/jukebox_prior_0,jukebox_prior,skip,Load problem -ArthurZ/jukebox-vqvae,jukebox_vqvae,skip,Load problem -asapp/sew-d-base-plus-400k-ft-ls100h,sew-d -ashishpatel26/span-marker-bert-base-fewnerd-coarse-super,span-marker,skip,Load problem -asi/albert-act-tiny,albert_act,skip,Load problem -BAAI/AltCLIP,altclip -BAAI/AquilaCode-py,aquila -bana513/opennmt-translator-en-hu,opennmt-translator,skip,Load problem -benjamin/wtp-bert-mini,bert-char,skip,Load problem -benjamin/wtp-canine-s-1l,la-canine,skip,Load problem -beomi/exKcBERT-kowiki,exbert,skip,Load problem -bert-base-uncased,bert -bigscience/bloom-560m,bloom -Bono93/basic-cnn-example,simplecnn,skip,Load problem -caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr,swin2sr -camembert-base,camembert -CarlCochet/trajectory-transformer-ant-expert-v2,trajectory_transformer,xfail,Tracing problem -CEBaB/bert-base-uncased.CEBaB.causalm.ambiance.2-class.exclusive.seed_42,bert_causalm,skip,Load problem -CEBaB/gpt2.CEBaB.causalm.None__None.2-class.exclusive.seed_42,gpt2_causalm,skip,Load problem -CEBaB/lstm.CEBaB.causalm.ambiance__food.2-class.exclusive.seed_42,lstm_causalm,skip,Load problem -CEBaB/roberta-base.CEBaB.causalm.None__None.2-class.exclusive.seed_42,roberta_causalm,skip,Load problem -cestwc/lava-small-gigaword,lava,skip,Load problem -chlab/efficientnet_61_planet_detection,efficientnet_61_planet_detection,skip,Load problem -CIDAS/clipseg-rd64-refined,clipseg -clefourrier/graphormer-base-pcqm4mv2,graphormer,xfail,Tracing problem -cliang1453/deberta-v3-base-rte-teacher-stage1,ted-deberta-v2,skip,Load problem -clip-italian/clip-italian,vision-text-dual-encoder,xfail,Tracing problem -Clyde013/deqbert-base,deqbert,skip,Load problem -connor-henderson/fastspeech2_conformer,fastspeech2_conformer,skip,Load problem -connor-henderson/fastspeech2_conformer_with_hifigan,fastspeech2_conformer_with_hifigan,skip,Load problem -csarron/meter-vqa2-ft,meter,skip,Load problem -ctrl,ctrl -cwkeam/mctc-large,mctc,skip,Load problem -dandelin/vilt-b32-finetuned-vqa,vilt,xfail,Accuracy due to random -dangkhoadl/custom_CNN_1D,cnn,skip,Load problem -declare-lab/segue-w2v2-base,segue,skip,Load problem -deepesh0x/autotrain-mlsec-1013333726,julien,skip,Load problem -Dewa/Dog_Model_From_Scratch_v2,simple_image_classification,skip,Load problem -dg845/univnet-dev,univnet,skip,Load problem -distilbert-base-uncased,distilbert -DiViorg/REC-Transformer,rec_transformer,skip,Load problem -dreamerlin/chatbind-7b-delta,llava_image_bind_select,skip,Load problem -edugp/data2vec-nlp-base,data2vec,skip,Load problem -EleutherAI/enformer-official-rough,enformer,skip,Load problem -EleutherAI/gpt-neo-125m,gpt_neo -EleutherAI/pythia-6.9b,gpt_neox -facebook/bart-large-mnli,bart -facebook/blenderbot-400M-distill,blenderbot -facebook/blenderbot_small-90M,blenderbot-small -facebook/convnextv2-tiny-22k-384,convnextv2 -facebook/detr-resnet-50,detr -facebook/dinov2-base,dinov2 -facebook/dpr-question_encoder-single-nq-base,dpr -facebook/encodec_24khz,encodec -facebook/esm2_t6_8M_UR50D,esm -facebook/flava-full,flava,xfail,Tracing problem -facebook/flava-image-codebook,flava_image_codebook,skip,Load problem -facebook/levit-128S,levit -facebook/m2m100_418M,m2m_100 -facebook/mask2former-swin-base-coco-panoptic,mask2former -facebook/maskformer-swin-base-coco,maskformer -facebook/mbart-large-50-many-to-many-mmt,mbart -facebook/mms-lid-126,wav2vec2 -facebook/mms-tts-eng,vits,xfail,Accuracy failed: results cannot be broadcasted -facebook/musicgen-small,musicgen -facebook/opt-125m,opt -facebook/rag-token-nq,rag,skip,Load problem -facebook/sam-vit-large,sam,xfail,No node with name original_sizes -facebook/timesformer-base-finetuned-k400,timesformer -facebook/vit-mae-base,vit_mae -facebook/wmt19-ru-en,fsmt,xfail,Tracing problem -facebook/xglm-7.5B,xglm -facebook/xlm-roberta-xl,xlm-roberta-xl -facebook/xmod-base,xmod -flax-community/ft5-cnn-dm,f_t5 -fnlp/elasticbert-base,elasticbert,skip,Load problem -FranzStrauss/ponet-base-uncased,ponet,skip,Load problem -funnel-transformer/small,funnel -fusing/latent-diffusion-text2im-large,ldmbert,skip,Load problem -Geor111y/flair-ner-addresses-extractor,flair,skip,Load problem -gia-project/gia,gia,skip,Load problem -gokuls/bert_12_layer_model_v1,hybridbert,skip,Load problem -google/bigbird-roberta-base,big_bird -google/bigbird-pegasus-large-arxiv,bigbird-pegasus -google/bit-50,bit -google/canine-s,canine,xfail,aten::slice: Parameter axis 3 out of the tensor rank range -google/efficientnet-b2,efficientnet,xfail,Compile error: AvgPool: Kernel after dilation has size (dim: 1408) larger than the data shape after padding (dim: 9) at axis 0. -google/electra-base-discriminator,electra -google/flan-t5-base,t5 -google/fnet-base,fnet,xfail,Unsupported op aten::fft_fftn aten::real -google/mobilebert-uncased,mobilebert -google/mobilenet_v1_0.75_192,mobilenet_v1 -google/mt5-base,mt5 -google/owlv2-base-patch16-ensemble,owlv2 -google/owlvit-base-patch32,owlvit -google/pix2struct-docvqa-base,pix2struct -google/realm-orqa-nq-openqa,realm,skip,Load problem -google/reformer-crime-and-punishment,reformer,xfail,Tracing problem -google/tapas-large-finetuned-wtq,tapas -google/vit-hybrid-base-bit-384,vit-hybrid,skip,Load problem -google/vivit-b-16x2-kinetics400,vivit -Goutham-Vignesh/ContributionSentClassification-scibert,scibert -gpt2,gpt2 -Graphcore/groupbert-base-uncased,groupbert,skip,Load problem -haoranzhao419/saffu-100M-0.1,saffu-100M-0.1,skip,Load problem -Helsinki-NLP/opus-mt-fr-en,marian -#hf-internal-testing/random-nllb-moe-2-experts,nllb-moe,skip,Load problem -hf-internal-testing/tiny-random-CodeGenModel,codegen -hf-internal-testing/tiny-random-convnext,convnext -hf-internal-testing/tiny-random-CvtModel,cvt -hf-internal-testing/tiny-random-Data2VecAudioModel,data2vec-audio,skip,Load problem -hf-internal-testing/tiny-random-Data2VecTextModel,data2vec-text -hf-internal-testing/tiny-random-Data2VecVisionModel,data2vec-vision -hf-internal-testing/tiny-random-DeiTModel,deit -hf-internal-testing/tiny-random-DonutSwinModel,donut-swin -hf-internal-testing/tiny-random-EfficientFormerForImageClassification,efficientformer -hf-internal-testing/tiny-random-flaubert,flaubert -hf-internal-testing/tiny-random-FocalNetModel,focalnet -hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,gpt_bigcode -hf-internal-testing/tiny-random-GPTJModel,gptj -hf-internal-testing/tiny-random-groupvit,groupvit -hf-internal-testing/tiny-random-IBertModel,ibert -hf-internal-testing/tiny-random-ImageGPTModel,imagegpt -hf-internal-testing/tiny-random-LiltModel,lilt -hf-internal-testing/tiny-random-LongT5Model,longt5,skip,Load problem -hf-internal-testing/tiny-random-MobileNetV2Model,mobilenet_v2 -hf-internal-testing/tiny-random-mobilevit,mobilevit -hf-internal-testing/tiny-random-MPNetModel,mpnet -hf-internal-testing/tiny-random-MptForCausalLM,mpt -hf-internal-testing/tiny-random-NllbMoeForConditionalGeneration,nllb_moe,skip,Load problem -hf-internal-testing/tiny-random-NystromformerModel,nystromformer -hf-internal-testing/tiny-random-RegNetModel,regnet -hf-internal-testing/tiny-random-RemBertModel,rembert -hf-internal-testing/tiny-random-RoFormerModel,roformer -hf-internal-testing/tiny-random-SegformerModel,segformer -hf-internal-testing/tiny-random-SEWModel,sew,skip,Load problem -hf-internal-testing/tiny-random-Speech2TextModel,speech_to_text,skip,Load problem -hf-internal-testing/tiny-random-speech-encoder-decoder,speech-encoder-decoder,skip,Load problem -hf-internal-testing/tiny-random-SplinterModel,splinter -hf-internal-testing/tiny-random-SqueezeBertModel,squeezebert -hf-internal-testing/tiny-random-SwinModel,swin -hf-internal-testing/tiny-random-vision_perceiver_conv,perceiver -hf-internal-testing/tiny-random-ViTMSNModel,vit_msn -hf-internal-testing/tiny-random-wav2vec2-conformer,wav2vec2-conformer -hf-internal-testing/tiny-random-XLMModel,xlm -hfl/vle-base,vle,skip,Load problem -HJHGJGHHG/GAU-Base-Full,gau,skip,Load problem -huggingface/autoformer-tourism-monthly,autoformer,skip,Load problem -huggingface/informer-tourism-monthly,informer,skip,Load problem -huggingface/time-series-transformer-tourism-monthly,time_series_transformer,skip,Load problem -HuggingFaceM4/tiny-random-idefics,idefics,xfail,Unsupported op aten::any aten::einsum prim::TupleConstruct prim::TupleUnpack -HuggingFaceM4/tiny-random-vllama-clip,vllama,skip,Load problem -HuggingFaceM4/tiny-random-vopt-clip,vopt,skip,Load problem -HuggingFaceH4/zephyr-7b-beta,mistral -HuiHuang/gpt3-damo-base-zh,gpt3,skip,Load problem -hustvl/yolos-tiny,yolos -iakarshu/tilt_base,tilt_base_configuration,skip,Load problem -ibm/MoLM-350M-4B,moduleformer,skip,Load problem -IDEA-CCNL/Randeng-Deltalm-362M-En-Zh,Deltalm,skip,Load problem -Inderpreet01/seaformer-semantic-segmentation-large,seaformer,skip,Load problem -Intel/dpt-hybrid-midas,dpt -# Intel/tvp-base,tvp,skip,Load problem # takes too long -isemmanuelolowe/code-embedder,instruct-codebert,skip,Load problem -isemmanuelolowe/instruct-codet5-5,instruct-codet5,skip,Load problem -jaketae/fastspeech2-ljspeech,fastspeech2,skip,Load problem -jambran/depression-classification,DepressionDetection,skip,Load problem -Jellywibble/dalio-reward-charlie-v1,reward-model,skip,Load problem -JonasGeiping/crammed-bert-legacy,crammedBERT,skip,Load problem -jonatasgrosman/wav2vec2-large-xlsr-53-english,wav2vec2 -Joqsan/test-my-fnet,my_fnet,skip,Load problem -jozhang97/deta-swin-large,deta,skip,Load problem -jploski/retnet-mini-shakespeare,retnet,skip,Load problem -juhi7ag/idea-model2,,skip,Load problem -junnyu/autobert-small-light,autobert,skip,Load problem -junnyu/chinese_GAU-alpha-char_L-24_H-768,gau_alpha,skip,Load problem -junnyu/flash_small_wwm_cluecorpussmall,flash,skip,Load problem -junnyu/flashquad_small_wwm_cluecorpussmall,flash_quad,skip,Load problem -kakaobrain/align-base,align,xfail,Compile error: AvgPool: Kernel after dilation has size (dim: 640) larger than the data shape after padding (dim: 9) at axis 0. -KBLab/megatron-bert-large-swedish-cased-110k,megatron-bert -kiddothe2b/hierarchical-transformer-base-4096-v2,hat,skip,Load problem -k-l-lambda/clip-text-generator,clip_text_generator,skip,Load problem -k-l-lambda/stable-diffusion-v1-4-inv-embed,inv_word_embed,skip,Load problem -krasserm/perceiver-ar-clm-base,perceiver-ar-causal-language-model,skip,Load problem -krasserm/perceiver-ar-sam-giant-midi,perceiver-ar-symbolic-audio-model,skip,Load problem -krasserm/perceiver-io-img-clf,perceiver-io-image-classifier,skip,Load problem -krasserm/perceiver-io-mlm,perceiver-io-masked-language-model,skip,Load problem -krasserm/perceiver-io-optical-flow,perceiver-io-optical-flow,skip,Load problem -krasserm/perceiver-io-txt-clf-imdb,perceiver-io-text-classifier,skip,Load problem -ksmcg/fan_small_12_p16_224,fan,skip,Load problem -laion/clap-htsat-unfused:audio_model,clap -laion/clap-htsat-unfused:audio_projection,clap -Langboat/ReGPT-125M-200G,re_gpt,skip,Load problem -lengyue233/content-vec-best,hubert -Lewislou/cellseg_sribd,cell_sribd,skip,Load problem -liamcripwell/o-conbart,context-bart,skip,Load problem -liamcripwell/pgdyn-plan,context-roberta,skip,Load problem -linhdo/graphdoc,graphdoc,skip,Load problem -LinkSoul/Chinese-LLaVA-Baichuan,llava,skip,Load problem -LinkSoul/LLaSM-Cllama2,llaaa,skip,Load problem -lintang/pile-t5-base-flan,umt5 -liuhaotian/LLaVA-Lightning-MPT-7B-preview,llava_mpt,skip,Load problem -liya0121/my_finetune_0121,progen,skip,Load problem -lucadiliello/BLEURT-20,bleurt,skip,Load problem -lum-ai/metal,metal,skip,Load problem -luodian/OTTER-MPT1B-RPJama-Init,otter,skip,Load problem -luoruipu1/valley-13b-v1-delta,Valley,skip,Load problem -luoruipu1/Valley2-7b,valley,skip,Load problem -Lutech-AI/I-SPIn,I-SPIn,skip,Load problem -MAGAer13/mplug-owl-llama-7b,mplug-owl,skip,Load problem -manu/contrastive_zeroner,contrastive_zeroner,skip,Load problem -manu/lilt-infoxlm-base,liltrobertalike,skip,Load problem -manu/mplt_untrained,mplt,skip,Load problem -matheusntg/character-bert-pt-normal,character_bert,skip,Load problem -MBZUAI/swiftformer-xs,swiftformer -MCG-NJU/videomae-base-finetuned-kinetics,videomae -M-CLIP/XLM-Roberta-Large-Vit-B-32,M-CLIP,skip,Load problem -medhabi/distilbert-base-uncased-score-pred,text-to-rating,skip,Load problem -meta-llama/Llama-2-7b-hf,llama,skip,Load problem -microsoft/beit-base-patch16-224-pt22k-ft22k,beit -microsoft/biogpt,biogpt -microsoft/conditional-detr-resnet-50,conditional_detr -microsoft/deberta-base,deberta -microsoft/git-large-coco,git,xfail,Tracing error: Please check correctness of provided example_input (but eval was correct) -microsoft/kosmos-2-patch14-224,kosmos-2 -microsoft/layoutlm-base-uncased,layoutlm -microsoft/layoutlmv2-base-uncased,layoutlmv2,xfail,Tracing error: Please check correctness of provided example_input (but eval was correct) -microsoft/layoutlmv3-base,layoutlmv3 -microsoft/markuplm-base,markuplm -microsoft/prophetnet-large-uncased-squad-qg,prophetnet -microsoft/resnet-50,resnet -microsoft/speecht5_hifigan,hifigan,skip,Load problem -microsoft/speecht5_tts,speecht5,xfail,Unsupported op aten::bernoulli -microsoft/swinv2-tiny-patch4-window8-256,swinv2 -microsoft/table-transformer-detection,table-transformer -microsoft/unispeech-1350-en-17h-ky-ft-1h,unispeech -microsoft/unispeech-sat-base-100h-libri-ft,unispeech-sat -microsoft/wavlm-large,wavlm,skip,Load problem -microsoft/xclip-base-patch32,xclip -microsoft/xprophetnet-large-wiki100-cased,xlm-prophetnet -miguelvictor/python-fromzero-lstmlm,lstmlm,skip,Load problem -mingzi151/test-hf-wav2vec2bert,wav2vec2bert,skip,Load problem -MIT/ast-finetuned-audioset-10-10-0.4593,audio-spectrogram-transformer -Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime,luke -mlml-chip/thyme2_colon_e2e,cnlpt,skip,Load problem -mnaylor/mega-base-wikitext,mega,xfail,Tracing error: Please check correctness of provided example_input (but eval was correct) -mohitsha/tiny-random-testing-bert2gpt2,encoder-decoder -MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli,deberta-v2 -MoritzLaurer/ernie-m-large-mnli-xnli,ernie_m -mrm8488/prunebert-base-uncased-finepruned-topK-squadv2,masked_bert,skip,Load problem -muditb/headline_classifier,BertModel,skip,Load problem -nanashi161382/clip-text-deprojector,clip_text_deprojector_model,skip,Load problem -nateraw/vit-age-classifier,vit -naver-clova-ocr/bros-base-uncased,bros -navervision/CompoDiff-Aesthetic,CompoDiff,skip,Load problem -navervision/KELIP,kelip,skip,Load problem -NCAI/NCAI-BERT,lean_albert,skip,Load problem -nglaura/skimformer,skimformer,skip,Load problem -nguyenvulebinh/robustspeech-asr,robustspeech,skip,Load problem -nguyenvulebinh/voice-filter,voicefilter,skip,Load problem -NiCy/seg-ment-tation,seg-ment-tation,skip,Load problem -nielsr/audio-spectogram-transformer-finetuned-audioset-10-10-0.4593,audio-spectogram-transformer,skip,Load problem -nielsr/convnext-tiny-maskrcnn,maskrcnn,skip,Load problem -nielsr/H3-125m,h3,skip,Load problem -nielsr/layoutreader-readingbank,layoutreader,skip,Load problem -nielsr/pix2seq-base,pix2seq,skip,Load problem -nielsr/tapex-large,tapex,skip,Load problem -nielsr/udop-large,udop,skip,Load problem -nielsr/vitmatte-small-composition-1k,vitmatte,skip,Load problem -nllg/poetry-bygpt5-small-en,bygpt5,skip,Load problem -nlpconnect/vit-gpt2-image-captioning,vision-encoder-decoder -OATML-Markslab/Tranception_Small,tranception,skip,Load problem -OFA-Sys/chinese-clip-vit-base-patch16,chinese_clip -openai/clip-vit-large-patch14,clip -openai/jukebox-1b-lyrics,jukebox,skip,Load problem -openai/whisper-medium,whisper,skip,Load problem -openai-gpt,openai-gpt -OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1,gpt_neox_reward_model,skip,Load problem -openmmlab/upernet-convnext-small,upernet -openMUSE/clip-vit-large-patch14-text-enc,clip_text_model,skip,Load problem -katuni4ka/opt-125m-gptq,opt -PatrickHaller/ngme-llama-264M,ngme,skip,Load problem -patrickvonplaten/bert2gpt2-cnn_dailymail-fp16,encoder_decoder,skip,Load problem -paulhindemith/test-zeroshot,test-zeroshot,skip,Load problem -PGT/orig-nystromformer-s-artificial-balanced-max500-490000-0,graph_nystromformer,skip,Load problem -pie/example-ner-spanclf-conll03,TransformerSpanClassificationModel,skip,Load problem -pie/example-re-textclf-tacred,TransformerTextClassificationModel,skip,Load problem -pleisto/yuren-baichuan-7b,multimodal_llama -predictia/europe_reanalysis_downscaler_convbaseline,convbilinear,skip,Load problem -predictia/europe_reanalysis_downscaler_convswin2sr,conv_swin2sr,skip,Load problem -pszemraj/led-large-book-summary,led -pszemraj/pegasus-x-large-book-summary,pegasus_x -qmeeus/whisper-small-ner-combined,whisper_for_slu,skip,Load problem -raman-ai/pcqv2-tokengt-lap16,tokengt,skip,Load problem -range3/pegasus-gpt2-medium,pegasusgpt2,skip,Load problem -regisss/bridgetower-newyorker-a100-8x,bridgetower -rinna/japanese-cloob-vit-b-16,cloob,skip,Load problem -Rocketknight1/tiny-random-falcon-7b,falcon -RUCAIBox/mass-base-uncased,mass,skip,Load problem -RWKV/rwkv-4-169m-pile,rwkv -sahasrarjn/interbert,BERT,skip,Load problem -saibo/genkalm-medium-gpt2,genkalm,skip,Load problem -sail/poolformer_m36,poolformer -SajjadAyoubi/clip-fa-vision,clip_vision_model -Salesforce/blip2-flan-t5-xl:vision_model,blip-2 -Salesforce/blip2-flan-t5-xl:qformer,blip-2 -Salesforce/blip2-flan-t5-xl:language_projection,blip-2 -Salesforce/blip-image-captioning-large,blip -Salesforce/instructblip-vicuna-7b,instructblip,skip,Load problem -SamLowe/roberta-base-go_emotions,roberta -sanchit-gandhi/enhanced_direct_s2st_en_to_es,speech-to-speech,skip,Load problem -sciki/finetune_tinybert,finetune-tinybert,skip,Load problem -sebastian-hofstaetter/colbert-distilbert-margin_mse-T2-msmarco,ColBERT,skip,Load problem -sebastian-hofstaetter/distilbert-cat-margin_mse-T2-msmarco,BERT_Cat,skip,Load problem -sebastian-hofstaetter/idcm-distilbert-msmarco_doc,IDCM,skip,Load problem -SenseTime/deformable-detr,deformable_detr,xfail,Tracing error: Please check correctness of provided example_input (but eval was correct) -shahules786/Reward-model-gptneox-410M,rm_gptneox_config,skip,Load problem -shauray/Llava-Llama-2-7B-hf,llavallama,skip,Load problem -shauray/ViTPose,vitpose,skip,Load problem -sheonhan/ict-imagenet-256,ict,skip,Load problem -shibing624/text2vec-base-chinese-paraphrase,ernie -shikhartuli/flexibert-mini,flexibert,skip,Load problem -shikras/shikra-7b-delta-v1-0708,shikra,skip,Load problem -shi-labs/dinat-mini-in1k-224,dinat,xfail,Accuracy validation failed -shi-labs/nat-mini-in1k-224,nat,xfail,Accuracy validation failed -shi-labs/oneformer_ade20k_swin_large,oneformer,xfail,Different number of outputs between framework and OpenVINO -shuqi/seed-encoder,seed_encoder,skip,Load problem -sijunhe/nezha-cn-base,nezha -sjiang1/codecse,roberta_for_cl,skip,Load problem -slh/fcnet-base-cased,fcnet,skip,Load problem -snoop2head/Deep-Shallow-Ko2En,transformer,skip,Load problem -Solomonik/SeqTokenModelMultiple,SeqToken,skip,Load problem -solotimes/lavibe_base,donut,skip,Load problem -songlab/gpn-brassicales,ConvNet,skip,Load problem -speechbrain/m-ctc-t-large,mctct -Splend1dchan/wav2vec2-large-lv60_t5lephone-small_lna_bs64,speechmix,skip,Load problem -stefan-it/bort-full,bort -SteveZhan/my-resnet50d,resnet_steve,skip,Load problem -suno/bark,bark,skip,Load problem -surajnair/r3m-50,r3m,skip,Load problem -susnato/clvp_dev,clvp,skip,Load problem -susnato/phi-1_5_dev,phi -Tanrei/GPTSAN-japanese,gptsan-japanese,xfail,Unsupported op aten::index_put_ prim::TupleConstruct -tau/bart-large-sled-govreport,tau/sled,skip,Load problem -taufeeque/best-cb-model,codebook,skip,Load problem -Team-PIXEL/pixel-base,pixel,skip,Load problem -tensorpro/clip_vip_pretrained_base_16,clip_vip,skip,Load problem -thomwolf/vqgan_imagenet_f16_1024,vqgan_model,skip,Load problem -thu-ml/zh-clip-vit-roberta-large-patch14,zhclip,skip,Load problem -tifa-benchmark/promptcap-coco-vqa,ofa,skip,Load problem -tli8hf/robertabase_snli,transformerfornli,skip,Load problem -# transfo-xl/transfo-xl-wt103,transfo-xl - deprecated by transformers due to security vulnerability, not inferable in latest transformers -transZ/BART_shared_clean,shared_bart,skip,Load problem -transZ/BART_shared_v2,shared_bart_v2,skip,Load problem -transZ/misecom,misecom,skip,Load problem -transZ/parex,parex,skip,Load problem -transZ/phrext,phrext,skip,Load problem -transZ/reword,reword,skip,Load problem -transZ/roberta_texid,roberta_texid,skip,Load problem -transZ/tforge_v1.9,Transformer_Forge,skip,Load problem -trl-internal-testing/tiny-random-BigBirdPegasusForConditionalGeneration,bigbird_pegasus -trl-internal-testing/tiny-random-BlenderbotSmallForConditionalGeneration,blenderbot-small,skip,Load problem -trl-internal-testing/tiny-random-MvpForConditionalGeneration,mvp -trl-internal-testing/tiny-random-SwitchTransformersForConditionalGeneration,switch_transformers,skip,Load problem -tuner007/pegasus_paraphrase,pegasus -turing-motors/heron-chat-blip-ja-stablelm-base-7b-v0,video_blip,skip,Load problem -turing-motors/heron-chat-git-ELYZA-fast-7b-v0,git_llama,skip,Load problem -uclanlp/plbart-base,plbart -uclanlp/visualbert-vqa-coco-pre,visual_bert -ummagumm-a/samolet_room_classifier,AirModelHF,skip,Load problem -ummagumm-a/samolet-room-classifier,gru,skip,Load problem -UNCANNY69/Misinfo-BERT-LSTM,BertLSTMForSequenceClassification,skip,Load problem -UNCANNY69/Miss-BERT-CNN,BertCNNForSequenceClassification,skip,Load problem -unc-nlp/lxmert-base-uncased,lxmert,skip,Load problem -uw-madison/mra-base-512-4,mra -uw-madison/yoso-4096,yoso -valhalla/cogview-gpt2-test,cog_view,skip,Load problem -valhalla/s2t_mustc_multilinguial_medium,speech_to_text_transformer,skip,Load problem -vblagoje/greaselm-csqa,greaselm,skip,Load problem -vinvino02/glpn-nyu,glpn -Visual-Attention-Network/van-base,van -visualjoyce/transformers4vl-uniter-base,uniter,skip,Load problem -visualjoyce/transformers4vl-vilbert-mt,vilbert,skip,Load problem -vumichien/nonsemantic-speech-trillsson3,trillsson_efficientnet,skip,Load problem -vumichien/trillsson3-ft-keyword-spotting-12,trillsson_efficient,skip,Load problem -wangruiai2023/nougat,nougat,skip,Load problem -weiweishi/roc-bert-base-zh,roc_bert -WENGSYX/CoNN_Parity,conn,skip,Load problem -xlm-roberta-base,xlm-roberta -xlnet-base-cased,xlnet -ybelkada/focusondepth,focusondepth,skip,Load problem -ybelkada/random-tiny-BertGenerationModel,bert-generation -YituTech/conv-bert-base,convbert -yjernite/retribert-base-uncased,retribert,xfail,Unsupported op aten::cross_entropy_loss -ylacombe/hf-seamless-m4t-medium,seamless_m4t,skip,Load problem -youzanai/clip-product-title-chinese,clip_chinese_model,skip,Load problem -Yova/SmallCapOPT7M,smallcap,skip,Load problem -yusufani/trclip-vitl14-e10,trclip,skip,Load problem -yysung53/dpr,text_similarity,skip,Load problem -Zetatech/pvt-tiny-224,pvt -ZinengTang/tvlt-base,tvlt,xfail,Conversion is failed for aten::cat: Argument element types are inconsistent -zuppif/resnetd-18,resnetd,skip,Load problem +# List of models +albert,albert/albert-base-v2 +align,kakaobrain/align-base,xfail,Compile error: AvgPool: Kernel after dilation has size (dim: 640) larger than the data shape after padding (dim: 9) at axis 0. +altclip,BAAI/AltCLIP +aquila,BAAI/AquilaCode-py +audio-spectrogram-transformer,MIT/ast-finetuned-audioset-10-10-0.4593 +autoformer,huggingface/autoformer-tourism-monthly,xfail,Load error: mat1 and mat2 shapes cannot be multiplied +bark,suno/bark,xfail,Load error: got an unexpected keyword argument 'input_ids' +bart,facebook/bart-large-mnli +beit,microsoft/beit-base-patch16-224-pt22k-ft22k +bert,sentence-transformers/all-MiniLM-L6-v2 +bert-generation,google/bert_for_seq_generation_L-24_bbc_encoder +big_bird,google/bigbird-roberta-base +bigbird_pegasus,google/bigbird-pegasus-large-arxiv +biogpt,microsoft/biogpt +bit,google/bit-50 +blenderbot,facebook/blenderbot-400M-distill +blenderbot-small,facebook/blenderbot_small-90M +blip,Salesforce/blip-image-captioning-large +blip_2_qformer,ZinengTang/qformer +blip_2_vision_model,ZinengTang/clip_last_layer_removed +blip-2,Salesforce/blip2-opt-2.7b:language_projection +blip-2,Salesforce/blip2-opt-2.7b:qformer +blip-2,Salesforce/blip2-opt-2.7b:vision_model +bloom,bigscience/bloom-560m +bort,stefan-it/bort-full +bridgetower,regisss/bridgetower-newyorker-a100-8x +bros,naver-clova-ocr/bros-base-uncased +camembert,Jean-Baptiste/camembert-ner +canine,google/canine-c,xfail,aten::slice: Parameter axis 3 out of the tensor rank range +chinese_clip,OFA-Sys/chinese-clip-vit-base-patch16 +clap,laion/clap-htsat-unfused:audio_model +clap,laion/clap-htsat-unfused:audio_projection +clip,openai/clip-vit-large-patch14 +clip_text_model,maze/CLIP-ViT-bigG-14 +clip_vision_model,SajjadAyoubi/clip-fa-vision +clipseg,CIDAS/clipseg-rd64-refined +clvp,susnato/clvp_dev +codegen,Salesforce/codegen-350M-mono +conditional_detr,microsoft/conditional-detr-resnet-50 +convbert,YituTech/conv-bert-base +convnext,facebook/convnext-large-224 +convnextv2,Pavarissy/ConvNextV2-large-DogBreed +ctrl,Salesforce/ctrl +cvt,microsoft/cvt-13 +data2vec-audio,m-a-p/music2vec-v1 +data2vec-text,facebook/data2vec-text-base +data2vec-vision,facebook/data2vec-vision-large-ft1k +deberta,microsoft/deberta-base +deberta-v2,MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7 +decision_transformer,edbeeching/decision-transformer-gym-hopper-expert +deformable_detr,SenseTime/deformable-detr,xfail,Trace error: op->outputs().size() == 1 INTERNAL ASSERT FAILED +deit,facebook/deit-base-distilled-patch16-224 +deta,jozhang97/deta-swin-large,xfail,ValueError: operands could not be broadcast together with shapes +detr,facebook/detr-resnet-50 +dinov2,facebook/dinov2-base +distilbert,distilbert/distilbert-base-uncased-finetuned-sst-2-english +donut-swin,pasusarla/donut_encoder +dpr,facebook/dpr-question_encoder-single-nq-base +dpt,Intel/dpt-large +efficientformer,DunnBC22/efficientformer-l3-300-Brain_Tumors_Image_Classification +efficientnet,google/efficientnet-b7,xfail,Compile error: AvgPool: Kernel after dilation has size (dim: 2560) larger than the data shape after padding (dim: 19) at axis 0. +electra,google/electra-base-discriminator +encodec,facebook/encodec_24khz +encoder_decoder,patrickvonplaten/bert2gpt2-cnn_dailymail-fp16,xfail,Load error: You have to specify either input_ids or inputs_embeds +encoder-decoder,raynardj/wenyanwen-chinese-translate-to-ancient +ernie,nghuyong/ernie-3.0-base-zh +ernie_m,MoritzLaurer/ernie-m-large-mnli-xnli +esm,InstaDeepAI/nucleotide-transformer-2.5b-multi-species +f_t5,flax-community/ft5-cnn-dm +fastspeech2_conformer,espnet/fastspeech2_conformer,xfail,Compile error: CPU plug-in doesn't support Tile operation with dynamic rank. Operation name: aten::repeat_interleave/Tile +fastspeech2_conformer_with_hifigan,espnet/fastspeech2_conformer_with_hifigan,xfail,Compile error: CPU plug-in doesn't support Tile operation with dynamic rank. Operation name: __module.model/aten::repeat_interleave/Tile +flaubert,lincoln/flaubert-mlsum-topic-classification +flava,facebook/flava-full +flava_image_codebook,facebook/flava-image-codebook +fnet,google/fnet-base,xfail,Unsupported op aten::fft_fftn aten::real +focalnet,microsoft/focalnet-small +fsmt,facebook/wmt19-en-ru,xfail,Trace error: Only tensors lists tuples of tensors or dictionary of tensors can be output from traced functions +funnel,funnel-transformer/small +git,microsoft/git-large-coco,xfail,Trace error: We don't have an op for aten::full but it isn't a special case +glpn,vinvino02/glpn-nyu +gpt_neo,EleutherAI/gpt-neo-2.7B +gpt2,openai-community/gpt2 +gptj,databricks/dolly-v1-6b +gptsan-japanese,Tanrei/GPTSAN-japanese,xfail,Unsupported op aten::index_put_ prim::TupleConstruct prim::TupleUnpack +graphormer,clefourrier/graphormer-base-pcqm4mv2,xfail,Load error: GraphormerForGraphClassification.forward() missing 6 required positional arguments: 'input_edges' 'attn_bias' 'in_degree' 'out_degree' 'spatial_pos' and 'attn_edge_type' +grounding-dino,IDEA-Research/grounding-dino-base,xfail,Trace error: op->outputs().size() == 1 INTERNAL ASSERT FAILED +groupvit,nvidia/groupvit-gcc-yfcc +hiera,namangarg110/hiera_base_224 +hifigan,microsoft/speecht5_hifigan,xfail,Load error: The size of tensor a (100) must match the size of tensor b (80) at non-singleton dimension 1 +hubert,facebook/hubert-large-ls960-ft +hybridbert,gokuls/bert_12_layer_model_v1 +ibert,DunnBC22/ibert-roberta-base-Abusive_Or_Threatening_Speech +idefics,HuggingFaceM4/tiny-random-idefics,xfail,aten::einsum Different input dimensions indicated by the same labels for Einsum must be compatible +imagegpt,hf-internal-testing/tiny-random-ImageGPTModel +informer,huggingface/informer-tourism-monthly,xfail,Load error: mat1 and mat2 shapes cannot be multiplied +instructblip,Salesforce/instructblip-vicuna-7b +jukebox,openai/jukebox-1b-lyrics,xfail,Load error: Module [JukeboxModel] is missing the required "forward" function +jukebox_vqvae,ArthurZ/jukebox-vqvae,skip,Load error: The checkpoint you are trying to load has model type `jukebox_vqvae` but Transformers does not recognize this architecture +kobert,weonjae0211/ds-kobert +kosmos-2,microsoft/kosmos-2-patch14-224 +LanguageBindDepth,LanguageBind/LanguageBind_Depth +LanguageBindImage,LanguageBind/LanguageBind_Image +LanguageBindThermal,LanguageBind/LanguageBind_Thermal +layoutlm,impira/layoutlm-document-qa +layoutlmv3,microsoft/layoutlmv3-base +led,pszemraj/led-large-book-summary +levit,facebook/levit-128S,xfail,Trace error: Cannot insert a Tensor that requires grad as a constant +lilt,nielsr/lilt-xlm-roberta-base +llama_with_landmark,Leooyii/Landmark_512_Slimpajama_1B +longformer,allenai/longformer-base-4096 +longt5,pszemraj/long-t5-tglobal-base-16384-book-summary,xfail,Compile error: unsupported Einsum +luke,oshizo/sbert-jsnli-luke-japanese-base-lite +lxmert,unc-nlp/lxmert-base-uncased +m2m_100,facebook/nllb-200-distilled-600M +mamba,dominguesm/mambarim-110m,xfail,Trace error: Tracer cannot infer type of tensor +marian,Helsinki-NLP/opus-mt-zh-en +markuplm,microsoft/markuplm-base +mask2former,facebook/mask2former-swin-large-coco-panoptic +maskformer,facebook/maskformer-swin-large-ade +mbart,facebook/mbart-large-50-many-to-many-mmt +mctct,speechbrain/m-ctc-t-large +mega,Bingsu/mega-150m-arch,xfail,Trace error: Cannot insert a Tensor that requires grad as a constant +megatron-bert,UFNLP/gatortron-base +mgp-str,alibaba-damo/mgp-str-base,xfail,Compile error: unsupported Einsum +mobilebert,google/mobilebert-uncased +mobilenet_v1,google/mobilenet_v1_0.75_192 +mobilenet_v2,google/mobilenet_v2_1.0_224 +mobilevit,apple/mobilevit-small +mobilevitv2,apple/mobilevitv2-1.0-imagenet1k-256,xfail,Col2Im: Static shape inference lacks constant data on port 1 +mpnet,sentence-transformers/all-mpnet-base-v2 +mpt,team-lucid/mptk-1b +mra,uw-madison/mra-base-512-4 +mt5,csebuetnlp/mT5_multilingual_XLSum +musicgen,facebook/musicgen-large +musicgen_melody,ylacombe/musicgen-melody +mvp,RUCAIBox/mtl-data-to-text +nezha,sijunhe/nezha-cn-base +nllb_moe,hf-tiny-model-private/tiny-random-NllbMoeForConditionalGeneration,xfail,Trace error: Tracer cannot infer type of Seq2SeqMoEOutput +nystromformer,uw-madison/nystromformer-4096 +olmo,DrNicefellow/Microscopic-Olmo-2B-1.1k-steps +oneformer,shi-labs/oneformer_ade20k_swin_tiny,xfail,Different number of outputs between framework and OpenVINO +openai-gpt,openai-community/openai-gpt +open-llama,aerner/lm-v2 +owlv2,google/owlv2-base-patch16-ensemble +owlvit,google/owlvit-base-patch32 +patchtsmixer,ibm-granite/granite-timeseries-patchtsmixer,xfail,Only tensors lists tuples of tensors or dictionary of tensors can be output from traced functions +patchtst,ibm/patchtst-etth1-pretrain,xfail,Conversion is failed for: aten::unfold +pegasus,tuner007/pegasus_paraphrase +pegasus_x,pszemraj/pegasus-x-large-book-summary +perceiver,hf-internal-testing/tiny-random-vision_perceiver_conv +phi,susnato/phi-1_5_dev +phi3,MaziyarPanahi/calme-2.1-phi3-4b +pix2struct,google/deplot +pix2struct_vision_model,UiPath/pix2struct-vision-base +plbart,uclanlp/plbart-base +poolformer,sail/poolformer_m36 +prophetnet,microsoft/prophetnet-large-uncased-squad-qg +pvt,Zetatech/pvt-tiny-224 +rag,facebook/rag-token-nq,xfail,Load error: Make sure that `context_input_ids` are passed if no `retriever` is set. +realm,google/realm-orqa-nq-openqa,xfail,Load error: 'NoneType' object is not callable +reformer,google/reformer-enwik8,xfail,Load error: index out of range in self +regnet,facebook/regnet-x-002 +rembert,google/rembert +resnet,microsoft/resnet-50 +roberta,deepset/roberta-base-squad2 +roberta-prelayernorm,andreasmadsen/efficient_mlm_m0.40 +roc_bert,weiweishi/roc-bert-base-zh +roformer,junnyu/roformer_chinese_sim_char_small +rt_detr,rafaelpadilla/porting_rt_detr,xfail,Trace error: Only tensors lists tuples of tensors or dictionary of tensors can be output from traced functions +rwkv,RWKV/rwkv-4-169m-pile +sam,facebook/sam-vit-huge +scibert,Goutham-Vignesh/ContributionSentClassification-scibert +seamless_m4t,facebook/hf-seamless-m4t-large +segformer,mattmdjaga/segformer_b2_clothes +seg-ment-tation,NiCy/seg-ment-tation +sew,anton-l/sew-mid-100k-ft-keyword-spotting +sew-d,asapp/sew-d-base-plus-400k-ft-ls100h +siglip,google/siglip-base-patch16-224 +siglip_vision_model,bczhou/TinyLLaVA-3.1B-SigLIP +speech_to_text,facebook/s2t-small-librispeech-asr +speech_to_text_2,pirxus/s2t2_decoder_base +speech-encoder-decoder,facebook/wav2vec2-xls-r-2b-22-to-16 +speecht5,microsoft/speecht5_tts,xfail,Unsupported op aten::bernoulli +splinter,tau/splinter-base-qass +squeezebert,typeform/squeezebert-mnli +stablelm,pansophic/rocket-3B +starcoder2,cognitivecomputations/dolphincoder-starcoder2-7b +superpoint,stevenbucaille/superpoint,xfail,Unsupported prim::TupleConstruct prim::TupleUnpack +swiftformer,MBZUAI/swiftformer-xs +swin,microsoft/swin-tiny-patch4-window7-224 +swin2sr,caidas/swin2SR-classical-sr-x2-64 +swinv2,microsoft/swinv2-tiny-patch4-window8-256 +switch_transformers,google/switch-base-8,xfail,Unsupported aten::index_put_ aten::logsumexp prim::TupleConstruct prim::TupleUnpack +t5,google/flan-t5-base +table-transformer,microsoft/table-transformer-detection +tapas,google/tapas-base-finetuned-wtq +tapex,nielsr/tapex-large +time_series_transformer,huggingface/time-series-transformer-tourism-monthly +timesformer,facebook/timesformer-base-finetuned-k400 +transfo-xl,transfo-xl/transfo-xl-wt103,xfail,Load error: type_as() missing 1 required positional arguments: "other" +tvlt,ZinengTang/tvlt-base,xfail,Load error: Matching task requires labels +tvp,Intel/tvp-base,xfail,Load error: 'NoneType' object has no attribute 'dtype' +udop,nielsr/udop-large,xfail,Trace error: We don't have an op for aten::full_like but it isn't a special case +umt5,EleutherAI/pile-t5-large +unispeech,microsoft/unispeech-1350-en-17h-ky-ft-1h +unispeech-sat,microsoft/unispeech-sat-base-100h-libri-ft +univnet,dg845/univnet-dev,xfail,Load error: "normal_kernel_cpu" not implemented for 'Long' +upernet,openmmlab/upernet-convnext-small +van,Visual-Attention-Network/van-tiny +videomae,MCG-NJU/videomae-base-finetuned-kinetics +vilt,dandelin/vilt-b32-finetuned-vqa,xfail,Accuracy validation failed +vision-encoder-decoder,nlpconnect/vit-gpt2-image-captioning +visual_bert,uclanlp/visualbert-vqa-coco-pre +vit,google/vit-base-patch16-224 +vit_mae,facebook/vit-mae-base +vit_msn,facebook/vit-msn-large-7 +vit-hybrid,google/vit-hybrid-base-bit-384 +vitmatte,nielsr/vitmatte-small-composition-1k,xfail,Unsupported aten::FloatImplicit aten::__contains__ aten::__isnot__ aten::append prim::Uninitialized prim::unchecked_cast +vits,facebook/mms-tts-eng,xfail,operands could not be broadcast together with shapes +vivit,google/vivit-b-16x2-kinetics400 +wav2vec2,jonatasgrosman/wav2vec2-large-xlsr-53-english +wav2vec2_base,gelbanna/test,xfail,Load error: expected scalar type Long but found Float +wav2vec2-conformer,codenamewei/speech-to-text +wavlm,microsoft/wavlm-large +whisper,openai/whisper-large-v3 +xclip,microsoft/xclip-base-patch32 +xglm,facebook/xglm-564M +xlm,FacebookAI/xlm-mlm-100-1280 +xlm-prophetnet,microsoft/xprophetnet-large-wiki100-cased +xlm-roberta,FacebookAI/xlm-roberta-base +xlm-roberta-xl,facebook/xlm-roberta-xl +xlnet,xlnet/xlnet-base-cased +xmod,facebook/xmod-base +yolos,hustvl/yolos-tiny +yoso,MrAnderson/yoso-4096-full-trivia diff --git a/tests/model_hub_tests/pytorch/test_hf_transformers.py b/tests/model_hub_tests/pytorch/test_hf_transformers.py index 5e3f19ad945399..9898e3a2af8dfc 100644 --- a/tests/model_hub_tests/pytorch/test_hf_transformers.py +++ b/tests/model_hub_tests/pytorch/test_hf_transformers.py @@ -3,55 +3,26 @@ import os +from datasets import Audio, load_dataset +from huggingface_hub import hf_hub_download, model_info +from huggingface_hub.utils import HfHubHTTPError, LocalEntryNotFoundError +from PIL import Image import pytest import torch -from huggingface_hub import model_info -from huggingface_hub.utils import HfHubHTTPError -from models_hub_common.constants import hf_hub_cache_dir -from models_hub_common.utils import cleanup_dir, retry import transformers -from transformers import AutoConfig, AutoModel, AutoProcessor, AutoTokenizer, AutoFeatureExtractor, AutoModelForTextToWaveform, \ - CLIPFeatureExtractor, XCLIPVisionModel, T5Tokenizer, VisionEncoderDecoderModel, ViTImageProcessor, BlipProcessor, BlipForConditionalGeneration, \ - SpeechT5Processor, SpeechT5ForTextToSpeech, LayoutLMv2Processor, Pix2StructForConditionalGeneration, RetriBertTokenizer, VivitImageProcessor - -from torch_utils import TestTorchConvertModel, process_pytest_marks - -def is_gptq_model(config): - config_dict = config.to_dict() if not isinstance(config, dict) else config - quantization_config = config_dict.get("quantization_config", None) - return quantization_config and quantization_config["quant_method"] == "gptq" - - -def patch_gptq(): - orig_cuda_check = torch.cuda.is_available - orig_post_init_model = None - torch.set_default_dtype(torch.float32) - torch.cuda.is_available = lambda: True +from transformers import ( + AutoConfig, AutoFeatureExtractor, AutoImageProcessor, AutoModel, + AutoModelForTextToWaveform, AutoProcessor, AutoTokenizer, + BlipForConditionalGeneration, BlipProcessor, CLIPFeatureExtractor, + FlavaImageModel, LayoutLMv2Processor, Pix2StructForConditionalGeneration, + RetriBertTokenizer, SpeechT5ForTextToSpeech, SpeechT5Processor, + T5Tokenizer, ViTImageProcessor, VisionEncoderDecoderModel, + VivitImageProcessor, XCLIPVisionModel +) - from optimum.gptq import GPTQQuantizer - - orig_post_init_model = GPTQQuantizer.post_init_model - - def post_init_model(self, model): - from auto_gptq import exllama_set_max_input_length - - class StoreAttr(object): - pass - - model.quantize_config = StoreAttr() - model.quantize_config.desc_act = self.desc_act - if self.desc_act and not self.disable_exllama and self.max_input_length is not None: - model = exllama_set_max_input_length(model, self.max_input_length) - return model - - GPTQQuantizer.post_init_model = post_init_model - return orig_cuda_check, orig_post_init_model - - -def unpatch_gptq(orig_cuda_check, orig_post_init_model): - from optimum.gptq import GPTQQuantizer - torch.cuda.is_available = orig_cuda_check - GPTQQuantizer.post_init_model = orig_post_init_model +from models_hub_common.constants import hf_hub_cache_dir +from models_hub_common.utils import cleanup_dir, get_models_list, retry +from torch_utils import TestTorchConvertModel def flattenize_tuples(list_input): @@ -72,42 +43,22 @@ def flattenize_outputs(outputs): return dict((k, v.numpy(force=True)) for k, v in outputs.items()) -def filter_example(model, example): - try: - import inspect - if isinstance(example, dict): - model_params = inspect.signature(model.forward).parameters - names_set = {p for p in model_params} - new_example = dict() - for k, v in example: - if k in names_set: - new_example[k] = v - return new_example - except: - return example - - # To make tests reproducible we seed the random generator torch.manual_seed(0) class TestTransformersModel(TestTorchConvertModel): def setup_class(self): - from PIL import Image import requests self.infer_timeout = 1800 url = "http://images.cocodataset.org/val2017/000000039769.jpg" self.image = Image.open(requests.get(url, stream=True).raw) - self.cuda_available, self.gptq_postinit = None, None - @retry(3, exceptions=(HfHubHTTPError,), delay=1) + @retry(3, exceptions=(HfHubHTTPError, LocalEntryNotFoundError), delay=1) def load_model(self, name, type): - name_suffix = '' - if name.find(':') != -1: - name_suffix = name[name.find(':') + 1:] - name = name[:name.find(':')] + name, _, name_suffix = name.partition(':') mi = model_info(name) auto_processor = None @@ -117,12 +68,7 @@ def load_model(self, name, type): config = AutoConfig.from_pretrained(name) except Exception: config = {} - is_gptq = is_gptq_model(config) model_kwargs = {"torchscript": True} - if is_gptq: - self.cuda_available, self.gptq_postinit = patch_gptq() - model_kwargs["torch_dtype"] = torch.float32 - self.ov_config = {"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"} if "bart" in mi.tags: model_kwargs["attn_implementation"] = "eager" try: @@ -137,10 +83,9 @@ def load_model(self, name, type): example = dict(encoded_input) elif 'xclip' in mi.tags: model = XCLIPVisionModel.from_pretrained(name, **model_kwargs) - # needs video as input - example = {'pixel_values': torch.randn(*(16, 3, 224, 224), dtype=torch.float32)} + example = {'pixel_values': torch.randn(16, 3, 224, 224)} elif 'audio-spectrogram-transformer' in mi.tags: - example = {'input_values': torch.randn(*(1, 1024, 128), dtype=torch.float32)} + example = {'input_values': torch.randn(1, 1024, 128)} elif 'mega' in mi.tags: model = AutoModel.from_pretrained(name, **model_kwargs) model.config.output_attentions = True @@ -150,33 +95,35 @@ def load_model(self, name, type): elif 'bros' in mi.tags: processor = AutoProcessor.from_pretrained(name) encoding = processor("to the moon!", return_tensors="pt") - bbox = torch.randn([1, 6, 8], dtype=torch.float32) - example = dict(input_ids=encoding["input_ids"], bbox=bbox, attention_mask=encoding["attention_mask"]) + bbox = torch.randn([1, 6, 8]) + example = dict( + input_ids=encoding["input_ids"], bbox=bbox, attention_mask=encoding["attention_mask"]) elif 'upernet' in mi.tags: processor = AutoProcessor.from_pretrained(name) example = dict(processor(images=self.image, return_tensors="pt")) elif 'deformable_detr' in mi.tags or 'oneformer' in mi.tags: processor = AutoProcessor.from_pretrained(name) - example = dict(processor(images=self.image, task_inputs=["semantic"], return_tensors="pt")) + example = dict(processor(images=self.image, task_inputs=[ + "semantic"], return_tensors="pt")) elif 'clap' in mi.tags: example_inputs_map = { - 'audio_model': {'input_features': torch.randn([1, 1, 1001, 64], dtype=torch.float32)}, - 'audio_projection': {'hidden_states': torch.randn([1, 768], dtype=torch.float32)}, + 'audio_model': {'input_features': torch.randn([1, 1, 1001, 64])}, + 'audio_projection': {'hidden_states': torch.randn([1, 768])}, } example = example_inputs_map[name_suffix] elif 'git' in mi.tags: processor = AutoProcessor.from_pretrained(name) - example = {'pixel_values': torch.randn(*(1, 3, 224, 224), dtype=torch.float32), + example = {'pixel_values': torch.randn(1, 3, 224, 224), 'input_ids': torch.randint(1, 100, size=(1, 13), dtype=torch.int64)} elif 'blip-2' in mi.tags: processor = AutoProcessor.from_pretrained(name) example = dict(processor(images=self.image, return_tensors="pt")) example_inputs_map = { - 'vision_model' : {'pixel_values': torch.randn([1, 3, 224, 224], dtype=torch.float32)}, - 'qformer': {'query_embeds' : torch.randn([1, 32, 768], dtype=torch.float32), - 'encoder_hidden_states' : torch.randn([1, 257, 1408], dtype=torch.float32), - 'encoder_attention_mask' : torch.ones([1, 257], dtype=torch.int64)}, - 'language_projection': {'input' : torch.randn([1, 32, 768], dtype=torch.float32)}, + 'vision_model': {'pixel_values': torch.randn([1, 3, 224, 224])}, + 'qformer': {'query_embeds': torch.randn([1, 32, 768]), + 'encoder_hidden_states': torch.randn([1, 257, 1408]), + 'encoder_attention_mask': torch.ones([1, 257])}, + 'language_projection': {'input': torch.randn([1, 32, 768])}, } example = example_inputs_map[name_suffix] elif "t5" in mi.tags: @@ -198,10 +145,11 @@ def load_model(self, name, type): example = dict(encoded_input) example["decoder_input_ids"] = torch.randint(0, 1000, [1, 20]) - example["decoder_attention_mask"] = torch.ones([1, 20], dtype=torch.int64) + example["decoder_attention_mask"] = torch.ones( + [1, 20], dtype=torch.int64) elif 'idefics' in mi.tags: processor = AutoProcessor.from_pretrained(name) - + prompts = [[ "User: What is in this image?", "https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG", @@ -216,47 +164,59 @@ def load_model(self, name, type): "\nAssistant:", ]] - inputs = processor(prompts, add_end_of_utterance_token=False, return_tensors="pt") + inputs = processor( + prompts, add_end_of_utterance_token=False, return_tensors="pt") example = dict(inputs) elif 'blip' in mi.tags and 'text2text-generation' in mi.tags: processor = BlipProcessor.from_pretrained(name) - model = BlipForConditionalGeneration.from_pretrained(name, **model_kwargs) + model = BlipForConditionalGeneration.from_pretrained( + name, **model_kwargs) text = "a photography of" inputs = processor(self.image, text, return_tensors="pt") example = dict(inputs) elif 'speecht5' in mi.tags: - from datasets import load_dataset - processor = SpeechT5Processor.from_pretrained(name) - model = SpeechT5ForTextToSpeech.from_pretrained(name, **model_kwargs) + model = SpeechT5ForTextToSpeech.from_pretrained( + name, **model_kwargs) - inputs = processor(text="Hello, my dog is cute.", return_tensors="pt") + inputs = processor(text="Hello, my dog is cute.", + return_tensors="pt") # load xvector containing speaker's voice characteristics from a dataset - embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") - speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) + embeddings_dataset = load_dataset( + "Matthijs/cmu-arctic-xvectors", split="validation") + speaker_embeddings = torch.tensor( + embeddings_dataset[7306]["xvector"]).unsqueeze(0) example = dict(inputs) example['speaker_embeddings'] = speaker_embeddings - example['decoder_input_values'] = torch.randn([1, 20, model.config.num_mel_bins]) + example['decoder_input_values'] = torch.randn( + [1, 20, model.config.num_mel_bins]) elif 'layoutlmv2' in mi.tags: processor = LayoutLMv2Processor.from_pretrained(name) question = "What's the content of this image?" - encoding = processor(self.image, question, max_length=512, truncation=True, return_tensors="pt") + encoding = processor( + self.image, question, max_length=512, truncation=True, return_tensors="pt") example = dict(encoding) elif 'pix2struct' in mi.tags: - model = Pix2StructForConditionalGeneration.from_pretrained(name, **model_kwargs) + model = Pix2StructForConditionalGeneration.from_pretrained( + name, **model_kwargs) processor = AutoProcessor.from_pretrained(name) import requests - from PIL import Image image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg" image = Image.open(requests.get(image_url, stream=True).raw) question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud" - inputs = processor(images=image, text=question, return_tensors="pt") + inputs = processor(images=image, text=question, + return_tensors="pt") example = dict(inputs) example["decoder_input_ids"] = torch.randint(0, 1000, [1, 20]) - example["decoder_attention_mask"] = torch.ones([1, 20], dtype=torch.int64) + example["decoder_attention_mask"] = torch.ones( + [1, 20], dtype=torch.int64) + elif "pix2struct_vision_model" in mi.tags: + image_processor = AutoProcessor.from_pretrained("google/pix2struct-textcaps-base") + inputs = image_processor(images=self.image, return_tensors="pt") + example = dict(inputs) elif "mms-lid" in name: processor = AutoFeatureExtractor.from_pretrained(name) input_values = processor(torch.randn(16000).numpy(), @@ -275,10 +235,11 @@ def load_model(self, name, type): encoded_input = processor(images=self.image, return_tensors="pt") example = (encoded_input.pixel_values,) elif "flava" in mi.tags: - processor = AutoProcessor.from_pretrained(name) - encoded_input = processor(text=["a photo of a cat", "a photo of a dog"], - images=[self.image, self.image], - return_tensors="pt") + model = FlavaImageModel.from_pretrained(name, **model_kwargs) + feature_extractor = AutoFeatureExtractor.from_pretrained(name) + + encoded_input = feature_extractor(images=[self.image], + return_tensors="pt") example = dict(encoded_input) elif "vivit" in mi.tags: frames = list(torch.randint( @@ -311,10 +272,11 @@ def load_model(self, name, type): text = "some example text in the English language" inputs = tokenizer(text, return_tensors="pt") example = dict(inputs) - elif 'musicgen' in mi.tags: + elif 'musicgen' in mi.tags or "musicgen_melody" in mi.tags: processor = AutoProcessor.from_pretrained(name) - model = AutoModelForTextToWaveform.from_pretrained(name, **model_kwargs) - + model = AutoModelForTextToWaveform.from_pretrained( + name, **model_kwargs) + inputs = processor( text=["80s pop track with bassy drums and synth"], padding=True, @@ -325,12 +287,92 @@ def load_model(self, name, type): pad_token_id = model.generation_config.pad_token_id example["decoder_input_ids"] = torch.ones( (inputs.input_ids.shape[0] * model.decoder.num_codebooks, 1), dtype=torch.long) * pad_token_id - elif 'kosmos-2' in mi.tags: + elif 'kosmos-2' in mi.tags or 'instructblip' in mi.tags: processor = AutoProcessor.from_pretrained(name) prompt = "An image of" - inputs = processor(text=prompt, images=self.image, return_tensors="pt") + inputs = processor( + text=prompt, images=self.image, return_tensors="pt") example = dict(inputs) + elif 'vitmatte' in mi.tags: + processor = AutoImageProcessor.from_pretrained(name) + filepath = hf_hub_download(repo_id="hf-internal-testing/image-matting-fixtures", + filename="image.png", + repo_type="dataset") + image = Image.open(filepath).convert("RGB") + filepath = hf_hub_download(repo_id="hf-internal-testing/image-matting-fixtures", + filename="trimap.png", + repo_type="dataset") + trimap = Image.open(filepath).convert("L") + inputs = processor(images=image, trimaps=trimap, + return_tensors="pt") + example = dict(inputs) + elif 'sam' in mi.tags: + processor = AutoProcessor.from_pretrained(name) + input_points = [[[450, 600]]] + inputs = processor(self.image, + input_points=input_points, + return_tensors="pt") + example = dict(inputs) + if "original_sizes" in example: + del example["original_sizes"] + if "reshaped_input_sizes" in example: + del example["reshaped_input_sizes"] + elif 'udop' in mi.tags: + processor = AutoProcessor.from_pretrained(name, apply_ocr=False) + dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") + example = dataset[0] + image = example["image"] + words = example["tokens"] + boxes = example["bboxes"] + inputs = processor(image, words, boxes=boxes, return_tensors="pt") + decoder_input_ids = torch.tensor([[config.decoder_start_token_id]]) + example = dict(decoder_input_ids=decoder_input_ids, + decoder_attention_mask=torch.tensor([[True]]), **inputs) + elif 'clvp' in mi.tags: + text = "This is an example text." + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", + "clean", split="validation") + ds = ds.cast_column("audio", Audio(sampling_rate=22050)) + sorted_audio = ds.sort("id").select(range(1))[:1]["audio"][0] + _, audio, sr = sorted_audio.values() + processor = AutoProcessor.from_pretrained(name) + inputs = processor(raw_speech=audio, sampling_rate=sr, + text=text, return_tensors="pt") + example = dict(inputs) + elif "decision_transformer" in mi.tags: + states = torch.randn(1, 1, config.state_dim) + actions = torch.zeros((1, 1, config.act_dim), dtype=torch.float32) + rewards = torch.zeros(1, 1, dtype=torch.float32) + target_return = torch.randn(1, 1, 1) + timesteps = torch.tensor(0, dtype=torch.long).reshape(1, 1) + attention_mask = torch.zeros(1, 1, dtype=torch.float32) + example = dict(states=states, + actions=actions, + rewards=rewards, + returns_to_go=target_return, + timesteps=timesteps, + attention_mask=attention_mask) + elif "time_series_transformer" in mi.tags or "informer" in mi.tags or "autoformer" in mi.tags: + file = hf_hub_download(repo_id="hf-internal-testing/tourism-monthly-batch", + filename="train-batch.pt", repo_type="dataset") + batch = torch.load(file) + example = dict(past_values=batch["past_values"], + past_time_features=batch["past_time_features"], + past_observed_mask=batch["past_observed_mask"], + static_categorical_features=batch["static_categorical_features"], + static_real_features=batch["static_real_features"], + future_time_features=batch["future_time_features"] + ) + elif "seg-ment-tation" in mi.tags: + image_processor = AutoImageProcessor.from_pretrained(name) + inputs = image_processor(images=self.image, return_tensors="pt") + example = dict(inputs) + elif "lxmert" in mi.tags: + example = {"input_ids": torch.randint(1, 1000, [1, 20]), + "attention_mask": torch.ones([1, 20], dtype=torch.bool), + "visual_feats": torch.randn(1, 10, config.visual_feat_dim), + "visual_pos": torch.randn(1, 10, config.visual_pos_dim)} else: try: if auto_model == "AutoModelForCausalLM": @@ -362,14 +404,12 @@ def load_model(self, name, type): example = dict(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids) elif auto_model == "AutoModelForSpeechSeq2Seq": - from datasets import load_dataset processor = AutoProcessor.from_pretrained(name) inputs = processor(torch.randn(1000).numpy(), sampling_rate=16000, return_tensors="pt") example = dict(inputs) elif auto_model == "AutoModelForCTC": - from datasets import load_dataset processor = AutoProcessor.from_pretrained(name) input_values = processor(torch.randn(1000).numpy(), return_tensors="pt") @@ -382,8 +422,8 @@ def load_model(self, name, type): queries = ["What is the name of the first actor?", "How many movies has George Clooney played in?", "What is the total number of movies?", ] - answer_coordinates = [[(0, 0)], [(2, 1)], [ - (0, 1), (1, 1), (2, 1)]] + answer_coordinates = [[(0, 0)], [(2, 1)], + [(0, 1), (1, 1), (2, 1)]] answer_text = [["Brad Pitt"], ["69"], ["209"]] table = pd.DataFrame.from_dict(data) encoded_input = tokenizer(table=table, queries=queries, answer_coordinates=answer_coordinates, @@ -407,14 +447,44 @@ def load_model(self, name, type): model = self.load_model_with_default_class(name, **model_kwargs) if hasattr(model, "set_default_language"): model.set_default_language("en_XX") + if hasattr(model, "config") and hasattr(model.config, "return_loss"): + model.config.return_loss = False if name_suffix != '': model = model._modules[name_suffix] if example is None: if "encodec" in mi.tags: example = (torch.randn(1, 1, 100),) + elif len({"blip_2_vision_model", "vit-hybrid", "siglip_vision_model", "flava_image_codebook", "superpoint", "donut-swin", "hiera"}.intersection(mi.tags)): + image_size = getattr(model.config, "image_size", 384) + if not isinstance(image_size, (list, tuple)): + image_size = [image_size, image_size] + example = {"pixel_values": torch.randn(1, 3, image_size[0], + image_size[1])} + elif len({"LanguageBindDepth", "LanguageBindImage", "LanguageBindThermal"}.intersection(mi.tags)): + image_size = getattr(model.config.vision_config, + "image_size", 384) + example = {"input_ids": torch.randint(0, 1000, [1, 20]), + "pixel_values": torch.randn(1, 3, image_size, image_size)} + elif len({"speech-encoder-decoder", "wav2vec2", "unispeech", "wavlm", "data2vec-audio", "sew"}.intersection(mi.tags)): + example = {"input_values": torch.rand(1, 1000)} + elif "blip_2_qformer" in mi.tags: + example = {"query_embeds": torch.randn(1, 20, model.config.hidden_size), + "attention_mask": torch.ones([1, 20]), + "encoder_hidden_states": torch.randn(1, 20, model.config.encoder_hidden_size), + "encoder_attention_mask": torch.ones([1, 20])} + elif "patchtsmixer" in mi.tags or "patchtst" in mi.tags: + example = {"past_values": torch.rand(1, model.config.context_length, + model.config.num_input_channels)} else: example = (torch.randint(1, 1000, [1, 100]),) - self.example = filter_example(model, example) + if len({"seamless_m4t", "whisper", "speech_to_text", "speech-encoder-decoder"}.intersection(mi.tags)): + example["decoder_input_ids"] = torch.randint(0, 1000, [1, 20]) + example["decoder_attention_mask"] = torch.ones( + [1, 20], dtype=torch.int64) + + if "hybridbert" in mi.tags and "token_type_ids" in example: + del example["token_type_ids"] + self.example = example if "vit_mae" in mi.tags: # vit-mae by default will generate random noise self.example["noise"] = torch.rand(1, 192) @@ -429,20 +499,19 @@ def load_model(self, name, type): def teardown_method(self): # remove all downloaded files from cache cleanup_dir(hf_hub_cache_dir) - # restore after gptq patching - if self.cuda_available is not None: - unpatch_gptq(self.cuda_available, self.gptq_postinit) - self.cuda_available, self.gptq_postinit = None, None + super().teardown_method() @staticmethod def load_model_with_default_class(name, **kwargs): try: mi = model_info(name) - assert len({"owlv2", "owlvit", "vit_mae"}.intersection(mi.tags)) == 0, "TBD: support default classes of these models" - assert "architectures" in mi.config and len(mi.config["architectures"]) == 1 + assert len({"owlv2", "owlvit", "vit_mae"}.intersection( + mi.tags)) == 0, "TBD: support default classes of these models" + assert "architectures" in mi.config and len( + mi.config["architectures"]) == 1 class_name = mi.config["architectures"][0] - model_class = transformers.__getattr__(class_name) + model_class = getattr(transformers, class_name) return model_class.from_pretrained(name, **kwargs) except: return AutoModel.from_pretrained(name, **kwargs) @@ -453,14 +522,19 @@ def load_model_with_default_class(name, **kwargs): ("google/tapas-large-finetuned-wtq", "tapas"), ("gpt2", "gpt2"), ("openai/clip-vit-large-patch14", "clip"), - ("katuni4ka/opt-125m-gptq", "opt"), ]) @pytest.mark.precommit def test_convert_model_precommit(self, name, type, ie_device): self.run(model_name=name, model_link=type, ie_device=ie_device) - @pytest.mark.parametrize("name", - process_pytest_marks(os.path.join(os.path.dirname(__file__), "hf_transformers_models"))) + @pytest.mark.parametrize("type,name,mark,reason", + get_models_list(os.path.join(os.path.dirname(__file__), "hf_transformers_models"))) @pytest.mark.nightly - def test_convert_model_all_models(self, name, ie_device): + def test_convert_model_all_models(self, name, type, mark, reason, ie_device): + valid_marks = ['skip', 'xfail'] + assert mark is None or mark in valid_marks, f"Invalid case for {name}" + if mark == 'skip': + pytest.skip(reason) + elif mark == 'xfail': + pytest.xfail(reason) self.run(model_name=name, model_link=None, ie_device=ie_device) diff --git a/tests/model_hub_tests/pytorch/test_llm.py b/tests/model_hub_tests/pytorch/test_llm.py new file mode 100644 index 00000000000000..43975500455967 --- /dev/null +++ b/tests/model_hub_tests/pytorch/test_llm.py @@ -0,0 +1,224 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import copy +import inspect + +import numpy as np +import pytest +import torch +from huggingface_hub.utils import HfHubHTTPError, LocalEntryNotFoundError +from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM + +from models_hub_common.utils import retry +from openvino.frontend.pytorch.patch_model import __make_16bit_traceable as patch +from openvino.frontend.pytorch.patch_model import unpatch_model as unpatch +from torch_utils import TestTorchConvertModel + + +def is_gptq_model(config): + config_dict = config.to_dict() if not isinstance(config, dict) else config + quantization_config = config_dict.get("quantization_config", None) + return quantization_config and quantization_config["quant_method"] == "gptq" + + +def patch_gptq(): + orig_cuda_is_available = torch.cuda.is_available + orig_cuda_is_bf16_supported = torch.cuda.is_bf16_supported + orig_cuda_get_device_capability = torch.cuda.get_device_capability + orig_post_init_model = None + torch.set_default_dtype(torch.float32) + torch.cuda.is_available = lambda: True + torch.cuda.is_bf16_supported = lambda: False + torch.cuda.get_device_capability = lambda n: (9, 1) + + from optimum.gptq import GPTQQuantizer + + orig_post_init_model = GPTQQuantizer.post_init_model + + def post_init_model(self, model): + from auto_gptq import exllama_set_max_input_length + + class StoreAttr(object): + pass + + model.quantize_config = StoreAttr() + model.quantize_config.desc_act = self.desc_act + if self.desc_act and not self.disable_exllama and self.max_input_length is not None: + model = exllama_set_max_input_length(model, self.max_input_length) + return model + + GPTQQuantizer.post_init_model = post_init_model + return (orig_cuda_is_available, orig_cuda_is_bf16_supported, orig_cuda_get_device_capability), orig_post_init_model + + +def unpatch_gptq(orig_cuda_check, orig_post_init_model): + from optimum.gptq import GPTQQuantizer + torch.cuda.is_available, torch.cuda.is_bf16_supported, torch.cuda.get_device_capability = orig_cuda_check + GPTQQuantizer.post_init_model = orig_post_init_model + + +def to_numpy(t): + if t.dtype in [torch.bfloat16, torch.float16]: + return t.to(torch.float32).numpy(force=True) + return t.numpy(force=True) + + +def flattenize_tuples(list_input): + unpacked_pt_res = [] + for r in list_input: + if isinstance(r, (tuple, list)): + unpacked_pt_res.extend(flattenize_tuples(r)) + else: + unpacked_pt_res.append(r) + return unpacked_pt_res + + +def flattenize_outputs(outputs): + if not isinstance(outputs, dict): + outputs = flattenize_tuples(outputs) + return [to_numpy(i) for i in outputs] + else: + return dict((k, to_numpy(v)) for k, v in outputs.items()) + + +# To make tests reproducible we seed the random generator +torch.manual_seed(0) + + +class TestLLMModel(TestTorchConvertModel): + def setup_class(self): + self.infer_timeout = 1800 + self.cuda_available, self.gptq_postinit = None, None + + @retry(3, exceptions=(HfHubHTTPError, LocalEntryNotFoundError), delay=1) + def load_model(self, name, type): + model = None + example = None + try: + config = AutoConfig.from_pretrained(name, trust_remote_code=True) + except Exception: + config = {} + model_kwargs = {"torchscript": True, "trust_remote_code": True} + is_gptq = is_gptq_model(config) + if is_gptq: + self.cuda_available, self.gptq_postinit = patch_gptq() + model_kwargs["torch_dtype"] = torch.float32 + self.ov_config = {"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"} + else: + model_kwargs["torch_dtype"] = "auto" + pass + + t = AutoTokenizer.from_pretrained(name, trust_remote_code=True) + self.model = AutoModelForCausalLM.from_pretrained(name, **model_kwargs) + if is_gptq: + model = self.model + else: + assert self.model.config.torch_dtype in [ + torch.float16, torch.bfloat16] + model = copy.deepcopy(self.model).float() + + example = t("Some input text to verify that model works.", + return_tensors='pt').__dict__['data'] + if type != "gptj": + pkv, am = self.get_pkv(model, t) + example["past_key_values"] = pkv + example["attention_mask"] = torch.cat( + [example["attention_mask"], am], -1) + if type not in ["opt", "falcon"]: + ids = torch.cumsum(example["attention_mask"] != 0, dim=1) - 1 + example["position_ids"] = ids[:, - + example["input_ids"].shape[1]:] + self.example = example + return model + + def get_inputs_info(self, model_obj): + return list(inspect.signature(getattr(model_obj, "forward", model_obj.__call__)).parameters) + + def prepare_inputs(self, inputs_info): + inputs = getattr(self, "inputs", self.example) + filtered_keys = [i for i in inputs_info if i in inputs] + res = [] + for k in filtered_keys: + v = inputs[k] + if isinstance(v, tuple): + v_flatten = flattenize_outputs(v) + if k == "past_key_values": + v_flatten = [v.astype(np.float32) for v in v_flatten] + res.extend(v_flatten) + else: + res.append(v.numpy()) + return res + + def infer_fw_model(self, model_obj, inputs): + inputs = getattr(self, "inputs", self.example) + fw_outputs = model_obj(**inputs) + return flattenize_outputs(fw_outputs) + + def convert_model_impl(self, model_obj): + is_patched = False + if getattr(self.model.config, "torch_dtype", None) in [torch.float16, torch.bfloat16]: + patch(self.model) + is_patched = True + # initialize model after patching + self.model(**self.example) + with torch.no_grad(): + ovm = super().convert_model_impl(self.model) + if is_patched: + unpatch(self.model, "_openvino_module_extension_patch_orig_forward") + # model_obj.float() + return ovm + + def teardown_method(self): + # restore after gptq patching + if self.cuda_available is not None: + unpatch_gptq(self.cuda_available, self.gptq_postinit) + self.cuda_available, self.gptq_postinit = None, None + super().teardown_method() + + @staticmethod + def get_pkv(model, tokenizer): + for_pkv = tokenizer("To get past key values", + return_tensors='pt').__dict__['data'] + with torch.no_grad(): + pkv = model(**for_pkv)[1] + + return pkv, for_pkv["attention_mask"] + + @pytest.mark.parametrize("type,name", [ + ("opt", "katuni4ka/opt-125m-gptq"), + ("llama", "TinyLlama/TinyLlama-1.1B-Chat-v1.0"), + ]) + @pytest.mark.precommit + @pytest.mark.nightly + def test_convert_model_precommit(self, name, type, ie_device): + self.run(model_name=name, model_link=type, ie_device=ie_device) + + @pytest.mark.parametrize("type,name", [ + ("baichuan", "baichuan-inc/Baichuan2-7B-Base"), + pytest.param("chatglm", "THUDM/chatglm3-6b", + marks=pytest.mark.xfail(reason="Accuracy validation failed")), + ("falcon", "tiiuae/falcon-7b-instruct"), + ("gemma", "beomi/gemma-ko-7b"), + ("gpt_neox", "EleutherAI/gpt-neox-20b"), + ("gpt_neox", "togethercomputer/RedPajama-INCITE-7B-Instruct"), + ("gpt_neox_japanese", "rinna/japanese-gpt-neox-3.6b"), + #pytest.param("gptj", "databricks/dolly-v1-6b",marks=pytest.mark.xfail(reason="prim::Constant")), + ("llama", "lmsys/vicuna-7b-v1.5"), + ("llama-2", "TheBloke/Llama-2-7B-GPTQ"), + pytest.param("llama-3.1", "hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4", + marks=pytest.mark.xfail(reason="Accuracy validation failed")), + pytest.param("mpt", "mosaicml/mpt-7b", + marks=pytest.mark.xfail(reason="tuple index out of range")), + ("opt", "facebook/opt-1.3b"), + ("persimmon", "adept/persimmon-8b-base"), + ("phi", "microsoft/phi-2"), + ("phi3", "microsoft/Phi-3-mini-4k-instruct"), + pytest.param("qwen", "TheBloke/Qwen-7B-Chat-GPTQ", + marks=pytest.mark.xfail(reason="Accuracy validation failed")), + ("qwen2", "Qwen/Qwen2-0.5B-Instruct"), + ("stablelm", "stabilityai/stablelm-3b-4e1t"), + ]) + @pytest.mark.nightly + def test_convert_model_nightly(self, name, type, ie_device): + self.run(model_name=name, model_link=type, ie_device=ie_device) diff --git a/tests/model_hub_tests/pytorch/test_timm.py b/tests/model_hub_tests/pytorch/test_timm.py index 78bd632179be6f..824374514b61c3 100644 --- a/tests/model_hub_tests/pytorch/test_timm.py +++ b/tests/model_hub_tests/pytorch/test_timm.py @@ -6,7 +6,8 @@ import pytest import timm import torch -from models_hub_common.utils import get_models_list +from huggingface_hub.utils import HfHubHTTPError, LocalEntryNotFoundError +from models_hub_common.utils import get_models_list, retry from torch_utils import TestTorchConvertModel @@ -47,6 +48,7 @@ def filter_timm(timm_list: list) -> list: class TestTimmConvertModel(TestTorchConvertModel): + @retry(3, exceptions=(HfHubHTTPError, LocalEntryNotFoundError), delay=1) def load_model(self, model_name, model_link): m = timm.create_model(model_name, pretrained=True) cfg = timm.get_pretrained_cfg(model_name) From 6b4b54edadb8af1889c8ce78b3586052ae4e1c16 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:04:50 +0400 Subject: [PATCH 07/10] Bump actions/upload-artifact from 4.3.3 to 4.3.4 (#25788) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.3 to 4.3.4.
Release notes

Sourced from actions/upload-artifact's releases.

v4.3.4

What's Changed

Full Changelog: https://github.com/actions/upload-artifact/compare/v4.3.3...v4.3.4

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/upload-artifact&package-manager=github_actions&previous-version=4.3.3&new-version=4.3.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/linux_cpu_dev.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/linux_cpu_dev.yml b/.github/workflows/linux_cpu_dev.yml index 94a8d308f54fe3..faf0c74934d169 100644 --- a/.github/workflows/linux_cpu_dev.yml +++ b/.github/workflows/linux_cpu_dev.yml @@ -167,7 +167,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: build_logs @@ -176,7 +176,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -184,7 +184,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -268,7 +268,7 @@ jobs: timeout-minutes: 25 - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: test-results-functional-cpu From c67dce1089e8e1005234d674102dd5226580a0da Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Tue, 30 Jul 2024 11:13:31 +0200 Subject: [PATCH 08/10] [ARM] Enable OpenMP on ARM platforms (#25329) ### Details: Apple's `llvm` does not support `-fopenmp`. Brew's `llvm` avoids this issue on OpenVINO build, however ACL build fails with the error `clang: error: unsupported argument 'libomp' to option '-fopenmp='`. The solution is to use `gcc` on Apple: ``` CXX=/opt/homebrew/Cellar/gcc/14.1.0_1/bin/g++-14 CC=/opt/homebrew/Cellar/gcc/14.1.0_1/bin/gcc-14 cmake -DTHREADING=OMP .. ``` On Linux compiler issues were not observed. `ACLScheduler` uses only 1 thread if OpenMP is used because `parallel_get_num_threads()` returns 1 in non-parallel section. The fix is suggested in https://github.com/openvinotoolkit/openvino/pull/25335 ### Tickets: - *ticket-id* --- cmake/dependencies.cmake | 15 ++++++++++++++- cmake/developer_package/compile_flags/sdl.cmake | 2 +- cmake/developer_package/plugins/plugins.cmake | 6 +++++- cmake/features.cmake | 5 +---- src/cmake/ov_parallel.cmake | 11 ++++++++++- src/plugins/intel_cpu/thirdparty/ACLConfig.cmake | 15 +++++++++++++-- 6 files changed, 44 insertions(+), 10 deletions(-) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 6edda8136b338f..117cf5d2765e6f 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -35,7 +35,20 @@ if(THREADING STREQUAL "OMP") SHA256 "591ea4a7e08bbe0062648916f42bded71d24c27f00af30a8f31a29b5878ea0cc" USE_NEW_LOCATION TRUE) else() - message(FATAL_ERROR "Intel OMP is not available on current platform") + message(WARNING "Pre-built Intel OMP is not available on current platform. System OMP will be used.") + find_package(OpenMP) + if(OpenMP_CXX_FOUND) + foreach(OpenMP_LIB ${OpenMP_CXX_LIBRARIES}) + string(FIND ${OpenMP_LIB} "omp" OpenMP_LIB_OMP_INDEX) + if(NOT OpenMP_LIB_OMP_INDEX EQUAL -1) + cmake_path(GET OpenMP_LIB PARENT_PATH OpenMP_LIB_DIR) + set(OMP_LIB ${OpenMP_LIB} CACHE FILEPATH "Path to OMP library") + set(OMP ${OpenMP_LIB_DIR} CACHE FILEPATH "Path to OMP root folder") + return() + endif() + endforeach() + endif() + message(FATAL_ERROR "System OpenMP has not been found") endif() update_deps_cache(OMP "${OMP}" "Path to OMP root folder") debug_message(STATUS "intel_omp=" ${OMP}) diff --git a/cmake/developer_package/compile_flags/sdl.cmake b/cmake/developer_package/compile_flags/sdl.cmake index d399ec4f62fe0d..4bc0edd33c4975 100644 --- a/cmake/developer_package/compile_flags/sdl.cmake +++ b/cmake/developer_package/compile_flags/sdl.cmake @@ -29,7 +29,7 @@ if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG OR OV_COMPILER_IS_INTEL_LLVM # Remove all symbol table and relocation information from the executable set(OV_C_CXX_FLAGS "${OV_C_CXX_FLAGS} -s") endif() - if(NOT MINGW) + if(NOT MINGW AND NOT APPLE) set(OV_LINKER_FLAGS "${OV_LINKER_FLAGS} -z noexecstack -z relro -z now") endif() elseif(OV_COMPILER_IS_CLANG OR OV_COMPILER_IS_INTEL_LLVM) diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake index c76ed82388f3cc..4be67e24cdb6c7 100644 --- a/cmake/developer_package/plugins/plugins.cmake +++ b/cmake/developer_package/plugins/plugins.cmake @@ -93,7 +93,11 @@ function(ov_add_plugin) endif() if(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CROSSCOMPILING) - target_link_options(${OV_PLUGIN_NAME} PRIVATE -Wl,--unresolved-symbols=ignore-in-shared-libs) + if (APPLE) + target_link_options(${OV_PLUGIN_NAME} PRIVATE -Wl,-undefined,dynamic_lookup) + else() + target_link_options(${OV_PLUGIN_NAME} PRIVATE -Wl,--unresolved-symbols=ignore-in-shared-libs) + endif() endif() set(custom_filter "") diff --git a/cmake/features.cmake b/cmake/features.cmake index 4063e2f8545ced..59dd3b286f0cc1 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -92,10 +92,7 @@ else() set(THREADING_DEFAULT "TBB") endif() -set(THREADING_OPTIONS "TBB" "TBB_AUTO" "SEQ") -if(NOT APPLE) - list(APPEND THREADING_OPTIONS "OMP") -endif() +set(THREADING_OPTIONS "TBB" "TBB_AUTO" "SEQ" "OMP") set(THREADING "${THREADING_DEFAULT}" CACHE STRING "Threading") set_property(CACHE THREADING PROPERTY STRINGS ${THREADING_OPTIONS}) diff --git a/src/cmake/ov_parallel.cmake b/src/cmake/ov_parallel.cmake index 0f5be2ed43518a..cfb69ce7b1445f 100644 --- a/src/cmake/ov_parallel.cmake +++ b/src/cmake/ov_parallel.cmake @@ -329,6 +329,11 @@ function(ov_set_threading_interface_for TARGET_NAME) elseif (THREADING STREQUAL "OMP") if (WIN32) set(omp_lib_name libiomp5md) + elseif (ARM OR AARCH64) + get_filename_component(OpenMP_CXX_LIB_NAME ${OMP_LIB} NAME) + string(REGEX REPLACE "^lib" "" OpenMP_CXX_LIB_NAME ${OpenMP_CXX_LIB_NAME}) + string(REGEX REPLACE "\\.[^.]*$" "" OpenMP_CXX_LIB_NAME ${OpenMP_CXX_LIB_NAME}) + set(omp_lib_name ${OpenMP_CXX_LIB_NAME}) else () set(omp_lib_name iomp5) endif () @@ -343,7 +348,11 @@ function(ov_set_threading_interface_for TARGET_NAME) set(lib_dbg_path ${lib_rel_path}) endif () else () - set(lib_rel_path ${OMP}/lib) + if (ARM OR AARCH64) + set(lib_rel_path ${OMP}) + else() + set(lib_rel_path ${OMP}/lib) + endif () set(lib_dbg_path ${lib_rel_path}) endif () diff --git a/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake b/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake index 09774aa4bec493..a142b5277202e5 100644 --- a/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake +++ b/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake @@ -264,6 +264,16 @@ elseif(NOT TARGET arm_compute::arm_compute) get_filename_component(toolchain_prefix "${CMAKE_CXX_COMPILER}" DIRECTORY) list(APPEND ARM_COMPUTE_OPTIONS toolchain_prefix="${toolchain_prefix}/") elseif(APPLE) + # we need to bypass this information in case of custom compiler is passed + # to cmake call. Such compiler and compiler prefix need to be passed to scons + get_filename_component(cxx_compiler "${CMAKE_CXX_COMPILER}" NAME) + get_filename_component(c_compiler "${CMAKE_C_COMPILER}" NAME) + get_filename_component(compiler_prefix "${CMAKE_CXX_COMPILER}" DIRECTORY) + + set(cmake_build_env + CC=${c_compiler} + CXX=${cxx_compiler}) + if(CMAKE_OSX_DEPLOYMENT_TARGET) set(extra_cxx_flags "${extra_cxx_flags} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}") set(minos_added ON) @@ -275,8 +285,9 @@ elseif(NOT TARGET arm_compute::arm_compute) endif() set(extra_cxx_flags "${extra_cxx_flags} --sysroot ${CMAKE_OSX_SYSROOT}") endif() - - set(extra_cxx_flags "${extra_cxx_flags} -Wno-error=return-stack-address") + if(OV_COMPILER_IS_CLANG) + set(extra_cxx_flags "${extra_cxx_flags} -Wno-error=return-stack-address") + endif() get_filename_component(compiler_prefix "${CMAKE_CXX_COMPILER}" DIRECTORY) list(APPEND ARM_COMPUTE_OPTIONS compiler_prefix="${compiler_prefix}/") From dd2f6141b0e162789332bfccc7dfaa90d6584d2a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:15:44 +0000 Subject: [PATCH 09/10] Bump actions/download-artifact from 4.1.7 to 4.1.8 (#25789) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 4.1.7 to 4.1.8.
Release notes

Sourced from actions/download-artifact's releases.

v4.1.8

What's Changed

Full Changelog: https://github.com/actions/download-artifact/compare/v4...v4.1.8

Commits
  • fa0a91b Merge pull request #341 from actions/robherley/bump-pkgs
  • b54d088 Update @​actions/artifact version, bump dependencies
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/download-artifact&package-manager=github_actions&previous-version=4.1.7&new-version=4.1.8)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ilya Lavrenov --- .github/workflows/linux_cpu_dev.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linux_cpu_dev.yml b/.github/workflows/linux_cpu_dev.yml index faf0c74934d169..447a8c52968044 100644 --- a/.github/workflows/linux_cpu_dev.yml +++ b/.github/workflows/linux_cpu_dev.yml @@ -209,13 +209,13 @@ jobs: PARALLEL_TEST_SCRIPT: ${{ github.workspace }}/install/tests/functional_test_utils/layer_tests_summary/run_parallel.py steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} From d253f4fd89c1a77a68cac0fa3d97e627a8ed4467 Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Tue, 30 Jul 2024 14:52:55 +0400 Subject: [PATCH 10/10] PassManager refactoring and new debug caps (#25637) ### Details: - Simplified run_passes logic in pass::Manager class - Moved debugging logic to a separate Profiler class - Added a name for pass::Manager - Extended debug caps: added serialization by env variable, added filtring and collection perf statistics in a file New debug output format: ![image_2024-07-25_17-26-54](https://github.com/user-attachments/assets/2504fa83-3496-47e7-89a3-27c66b3e97b1) ### Tickets: - *CVS-147285* --- .../src/align_quantization_intervals.cpp | 2 +- .../src/align_quantization_parameters.cpp | 2 +- .../src/low_precision.cpp | 4 +- .../markup_avg_pool_precision_preserved.cpp | 2 +- .../src/propagate_precisions.cpp | 2 +- .../src/pass/common_optimizations.cpp | 2 +- .../snippets/src/pass/fq_decomposition.cpp | 2 +- src/common/snippets/src/pass/tokenization.cpp | 2 +- .../common_optimizations.cpp | 2 +- .../convert_nms_gather_path_to_unsigned.cpp | 2 +- .../moc_legacy_transformations.cpp | 2 +- .../moc_transformations.cpp | 2 +- .../optimize_strided_slice.cpp | 2 +- .../common_optimizations/ric_fusion.cpp | 2 +- .../simplify_shape_of_sub_graph.cpp | 2 +- .../src/transformations/convert_precision.cpp | 4 +- .../convert_compression_only_to_legacy.cpp | 2 +- ...k_subgraphs_to_keep_in_mixed_precision.cpp | 2 +- .../convert_opset2_to_opset1.cpp | 2 +- .../convert_opset3_to_opset2.cpp | 2 +- .../smart_reshape/smart_reshape.cpp | 4 +- .../symbolic_optimizations.cpp | 2 +- src/core/include/openvino/pass/manager.hpp | 15 +- src/core/src/graph_util.cpp | 4 +- src/core/src/model.cpp | 2 +- src/core/src/pass/convert_fp32_to_fp16.cpp | 2 +- src/core/src/pass/manager.cpp | 300 +++++++++++++----- src/core/src/pass/sdpa_to_paged_attention.cpp | 2 +- src/frontends/ir/src/frontend.cpp | 2 +- src/frontends/onnx/frontend/src/frontend.cpp | 6 +- src/frontends/paddle/src/frontend.cpp | 8 +- src/frontends/pytorch/src/frontend.cpp | 2 +- src/frontends/tensorflow/src/frontend.cpp | 4 +- .../tensorflow_lite/src/frontend.cpp | 6 +- .../tflite_quantize_resolver.cpp | 2 +- src/plugins/auto_batch/src/plugin.cpp | 2 +- .../convert_to_cpu_specific_opset.hpp | 2 +- .../transformation_pipeline.cpp | 12 +- .../transformations/convert_convolution.cpp | 2 +- .../src/plugin/transformations_pipeline.cpp | 8 +- .../compiler/src/graph_transformations.cpp | 2 +- src/plugins/template/src/plugin.cpp | 2 +- 42 files changed, 287 insertions(+), 148 deletions(-) diff --git a/src/common/low_precision_transformations/src/align_quantization_intervals.cpp b/src/common/low_precision_transformations/src/align_quantization_intervals.cpp index e8f8bb0250d8af..2d28adfe7f8570 100644 --- a/src/common/low_precision_transformations/src/align_quantization_intervals.cpp +++ b/src/common/low_precision_transformations/src/align_quantization_intervals.cpp @@ -20,7 +20,7 @@ ov::pass::low_precision::AlignQuantizationIntervals::AlignQuantizationIntervals( bool ov::pass::low_precision::AlignQuantizationIntervals::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(AlignQuantizationIntervals); - ov::pass::Manager manager; + ov::pass::Manager manager("LPT:AlignQuantizationIntervals"); manager.set_per_pass_validation(false); std::shared_ptr intervalsAlignment = manager.register_pass(); intervalsAlignment->add_matcher>( diff --git a/src/common/low_precision_transformations/src/align_quantization_parameters.cpp b/src/common/low_precision_transformations/src/align_quantization_parameters.cpp index 236cc802d4f9a3..88729c63a6faf7 100644 --- a/src/common/low_precision_transformations/src/align_quantization_parameters.cpp +++ b/src/common/low_precision_transformations/src/align_quantization_parameters.cpp @@ -21,7 +21,7 @@ ov::pass::low_precision::AlignQuantizationParameters::AlignQuantizationParameter bool ov::pass::low_precision::AlignQuantizationParameters::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(AlignQuantizationParameters); - ov::pass::Manager manager; + ov::pass::Manager manager("LPT:AlignQuantizationParameters"); manager.set_per_pass_validation(false); std::shared_ptr propagation = manager.register_pass(); propagation->add_matcher>(); diff --git a/src/common/low_precision_transformations/src/low_precision.cpp b/src/common/low_precision_transformations/src/low_precision.cpp index 6435f47d12ffec..e58374ed3e2b1a 100644 --- a/src/common/low_precision_transformations/src/low_precision.cpp +++ b/src/common/low_precision_transformations/src/low_precision.cpp @@ -190,7 +190,7 @@ MarkupOptimizations::MarkupOptimizations( bool ov::pass::low_precision::MarkupOptimizations::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(MarkupOptimizations); - ov::pass::Manager markup(get_pass_config()); + ov::pass::Manager markup(get_pass_config(), "LPT:MarkupOptimizations"); markup.set_per_pass_validation(false); markup.register_pass(params.defaultPrecisions); if (!precisionRestrictions.empty()) { @@ -217,7 +217,7 @@ bool ov::pass::low_precision::LowPrecision::run_on_model(const std::shared_ptr(); const std::vector supportedTypes = {ov::element::i8, ov::element::u8}; diff --git a/src/common/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp b/src/common/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp index ef5675bd764b5f..9dfe0c39caa419 100644 --- a/src/common/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp +++ b/src/common/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp @@ -19,7 +19,7 @@ ov::pass::low_precision::MarkupAvgPoolPrecisionPreserved::MarkupAvgPoolPrecision bool ov::pass::low_precision::MarkupAvgPoolPrecisionPreserved::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(MarkupAvgPoolPrecisionPreserved); - ov::pass::Manager manager; + ov::pass::Manager manager("LPT:MarkupAvgPoolPrecisionPreserved"); manager.set_per_pass_validation(false); std::shared_ptr markupAvgPoolPrecision = manager.register_pass(); markupAvgPoolPrecision->add_matcher>(); diff --git a/src/common/low_precision_transformations/src/propagate_precisions.cpp b/src/common/low_precision_transformations/src/propagate_precisions.cpp index f849dbfe55263c..a899b6939a6039 100644 --- a/src/common/low_precision_transformations/src/propagate_precisions.cpp +++ b/src/common/low_precision_transformations/src/propagate_precisions.cpp @@ -21,7 +21,7 @@ ov::pass::low_precision::PropagatePrecisions::PropagatePrecisions(const Attribut bool ov::pass::low_precision::PropagatePrecisions::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(PropagatePrecisions); - ov::pass::Manager manager; + ov::pass::Manager manager("LPT:PropagatePrecisions"); manager.set_per_pass_validation(false); std::shared_ptr precisionsPropagation = manager.register_pass(); precisionsPropagation->add_matcher>(params, AttributeSource::OutputPort); diff --git a/src/common/snippets/src/pass/common_optimizations.cpp b/src/common/snippets/src/pass/common_optimizations.cpp index 516737f621051c..d9cc6f7f819201 100644 --- a/src/common/snippets/src/pass/common_optimizations.cpp +++ b/src/common/snippets/src/pass/common_optimizations.cpp @@ -45,7 +45,7 @@ CommonOptimizations::CommonOptimizations(const SnippetsTokenization::Config& con // Firstly, we should transform all original Converts inside body to ConvertTruncation to save original behavior. // Then if Subgraph contains FakeQuantize we enable specific transformation for quantized subgraphs. - ov::pass::Manager manager(get_pass_config()); + ov::pass::Manager manager(get_pass_config(), "Snippets:CommonOptimizations"); REGISTER_SNIPPETS_PASS(manager, ov::snippets::pass::TransformConvertToConvertTruncation, true); REGISTER_SNIPPETS_PASS(manager, ov::snippets::pass::ExplicitTransposeMatMulInputs, is_domain_sensitive); REGISTER_SNIPPETS_PASS(manager, ov::snippets::pass::CommonFakeQuantizeDecomposition, is_quantized); diff --git a/src/common/snippets/src/pass/fq_decomposition.cpp b/src/common/snippets/src/pass/fq_decomposition.cpp index 2328e7f12c1681..fe5e98e8a8a4c9 100644 --- a/src/common/snippets/src/pass/fq_decomposition.cpp +++ b/src/common/snippets/src/pass/fq_decomposition.cpp @@ -375,7 +375,7 @@ bool ov::snippets::pass::CommonFakeQuantizeDecomposition::is_supported_fq(const bool ov::snippets::pass::CommonFakeQuantizeDecomposition::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(CommonFakeQuantizeDecomposition); - ov::pass::Manager manager; + ov::pass::Manager manager("Snippets:CommonFakeQuantizeDecomposition"); manager.set_per_pass_validation(false); manager.register_pass(); manager.register_pass(); diff --git a/src/common/snippets/src/pass/tokenization.cpp b/src/common/snippets/src/pass/tokenization.cpp index 643f169f8b8416..43733fc196ee83 100644 --- a/src/common/snippets/src/pass/tokenization.cpp +++ b/src/common/snippets/src/pass/tokenization.cpp @@ -76,7 +76,7 @@ bool EnumerateNodes::run_on_model(const std::shared_ptr &m) { bool SnippetsTokenization::run_on_model(const std::shared_ptr& m) { RUN_ON_FUNCTION_SCOPE(SnippetsTokenization); - ov::pass::Manager manager(get_pass_config()); + ov::pass::Manager manager(get_pass_config(), "Snippets:Tokenization"); manager.set_per_pass_validation(false); manager.register_pass(); diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 9da4340c2423f4..d7ca44e7ddad34 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -121,7 +121,7 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(CommonOptimizations); - ov::pass::Manager manager(get_pass_config()); + ov::pass::Manager manager(get_pass_config(), "CommonOptimizations"); manager.set_per_pass_validation(false); using namespace ov::pass; diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp index 3252882472ffec..7c22dbdfeac53d 100644 --- a/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp @@ -102,7 +102,7 @@ class PropagateNMSPath : public pass::MatcherPass { for (size_t body_idx = 0; body_idx < models.size(); ++body_idx) { handle_params(multi_subgraph_op, models[body_idx], static_cast(body_idx)); - ov::pass::Manager manager; + ov::pass::Manager manager("PropagateNMSPath"); manager.register_pass(); manager.run_passes(models[body_idx]); handle_results(multi_subgraph_op, models[body_idx], static_cast(body_idx)); diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_legacy_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_legacy_transformations.cpp index 83cf163555c327..7c21f98439d9a4 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_legacy_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_legacy_transformations.cpp @@ -12,7 +12,7 @@ bool ov::pass::MOCLegacyTransformations::run_on_model(const std::shared_ptr& f) { RUN_ON_MODEL_SCOPE(MOCLegacyTransformations); - ov::pass::Manager manager(get_pass_config()); + ov::pass::Manager manager(get_pass_config(), "MOCLegacyTransformations"); using namespace ov::pass; REGISTER_PASS(manager, ChangePlaceholderTypes, m_params_with_custom_types) manager.run_passes(f); diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index 44c227623f444d..3cf542377d5adc 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -125,7 +125,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr f->validate_nodes_and_infer_types(); } - ov::pass::Manager manager(get_pass_config()); + ov::pass::Manager manager(get_pass_config(), "MOC"); manager.set_per_pass_validation(false); using namespace ov::pass; REGISTER_PASS(manager, InitNodeInfo) diff --git a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp index ace7e544bc994c..8d093878ff0b93 100644 --- a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp @@ -425,7 +425,7 @@ ov::pass::StridedSliceOptimization::StridedSliceOptimization(bool use_shapes) { bool ov::pass::StridedSliceOptimization::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(StridedSliceOptimization); - ov::pass::Manager manager; + ov::pass::Manager manager("StridedSliceOptimization"); manager.set_per_pass_validation(false); if (m_use_shapes) { manager.register_pass(); diff --git a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp index 7c095fbe89fe1f..6e44692b5f169c 100644 --- a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp @@ -844,7 +844,7 @@ bool ov::pass::ReverseInputChannelsFusion::run_on_model(const std::shared_ptr(); ADD_MATCHER(ric_init, SplitConcat, nodes_to_fuse) diff --git a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp index d4bb02227c56ac..a225f0655f98ee 100644 --- a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp @@ -353,7 +353,7 @@ pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { bool pass::SimplifyShapeOfSubGraph::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(SimplifyShapeOfSubGraph); - Manager manager(get_pass_config()); + Manager manager(get_pass_config(), "SimplifyShapeOfSubGraph"); manager.set_per_pass_validation(false); REGISTER_PASS(manager, PrepareShapeOpsForEliminationAroundBE) diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 6d9d11ff52bcba..3c819d481bacee 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -413,7 +413,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr& bool has_fp16_compression = m_precisions.count(element::f32) > 0 && m_precisions[element::f32] == element::f16; if (m_keep_precision_sensitive_in_fp32 && has_fp16_compression) { - pass::Manager manager(get_pass_config()); + pass::Manager manager(get_pass_config(), "KeepPrecisionSensitiveInFP32"); // Mark subgraphs with disable_fp16_compression to keep them in FP32 manager.register_pass(); manager.register_pass(); @@ -494,7 +494,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr& // to remove extra converts if (m_keep_precision_sensitive_in_fp32) { - pass::Manager manager(get_pass_config()); + pass::Manager manager(get_pass_config(), "KeepPrecisionSensitiveInFP32:RemoveConverts"); manager.register_pass(); manager.register_pass(); manager.run_passes(f); diff --git a/src/common/transformations/src/transformations/fp16_compression/convert_compression_only_to_legacy.cpp b/src/common/transformations/src/transformations/fp16_compression/convert_compression_only_to_legacy.cpp index ebeb900a31f2b8..b471424aeb9e65 100644 --- a/src/common/transformations/src/transformations/fp16_compression/convert_compression_only_to_legacy.cpp +++ b/src/common/transformations/src/transformations/fp16_compression/convert_compression_only_to_legacy.cpp @@ -17,7 +17,7 @@ using namespace ov; bool ov::pass::ConvertCompressedOnlyToLegacy::run_on_model(const std::shared_ptr& f) { RUN_ON_MODEL_SCOPE(ConvertCompressedOnlyToLegacy); if (ov::op::util::has_decompression_converts(f)) { - Manager manager(get_pass_config()); + Manager manager(get_pass_config(), "ConvertCompressedOnlyToLegacy"); const precisions_map convert_precision_map{{ov::element::f32, ov::element::f16}}; manager.register_pass(convert_precision_map); diff --git a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp index c5e768fa687bc2..45c455fd61e87c 100644 --- a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp +++ b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp @@ -423,7 +423,7 @@ class PropagateDownDisableSensitivityForQuantized : public pass::MatcherPass { bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr& m) { RUN_ON_MODEL_SCOPE(MarkSugraphsToKeepInMixedPrecision); - Manager manager(get_pass_config()); + Manager manager(get_pass_config(), "MarkSugraphsToKeepInMixedPrecision"); // Mark root of Division with eps pattern to keep in FP32 REGISTER_PASS(manager, MarkDivWithEps) REGISTER_PASS(manager, MarkExpInReduceOpPath) diff --git a/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp b/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp index ab042a4cbdaaf4..3279fcb3e6cb55 100644 --- a/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp +++ b/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp @@ -14,7 +14,7 @@ bool ov::pass::ConvertOpSet2ToOpSet1::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(ConvertOpSet2ToOpSet1); - ov::pass::Manager manager(get_pass_config()); + ov::pass::Manager manager(get_pass_config(), "ConvertOpSet2ToOpSet1"); manager.set_per_pass_validation(false); manager.register_pass(); diff --git a/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp b/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp index 4c9ef1950ace84..045ad23a506220 100644 --- a/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp +++ b/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp @@ -17,7 +17,7 @@ bool ov::pass::ConvertOpSet3ToOpSet2::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(ConvertOpSet3ToOpSet2); - ov::pass::Manager manager(get_pass_config()); + ov::pass::Manager manager(get_pass_config(), "ConvertOpSet3ToOpSet2"); manager.set_per_pass_validation(false); manager.register_pass(); diff --git a/src/common/transformations/src/transformations/smart_reshape/smart_reshape.cpp b/src/common/transformations/src/transformations/smart_reshape/smart_reshape.cpp index 67e7084380375e..e1ddcdc3540e56 100644 --- a/src/common/transformations/src/transformations/smart_reshape/smart_reshape.cpp +++ b/src/common/transformations/src/transformations/smart_reshape/smart_reshape.cpp @@ -21,7 +21,7 @@ bool ov::pass::SmartReshape::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(SmartReshape); - ov::pass::Manager static_manager; + ov::pass::Manager static_manager("SmartReshape:static"); // This pass must be called first in pipeline static_manager.register_pass(); static_manager.register_pass(); @@ -37,7 +37,7 @@ bool ov::pass::SmartReshape::run_on_model(const std::shared_ptr& f) { static_manager.register_pass(); static_manager.run_passes(f); - ov::pass::Manager dynamic_manager; + ov::pass::Manager dynamic_manager("SmartReshape:dynamic"); // function revalidation will cause "fake" dynamism due to ShapeOf ops insertions // we turn it off to have access to originally static shapes dynamic_manager.set_per_pass_validation(false); diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp index 197644ffca6d84..d6629f326a2a70 100644 --- a/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp +++ b/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp @@ -172,7 +172,7 @@ ov::pass::LabelResolvingThroughSelect::LabelResolvingThroughSelect() { } ov::pass::SymbolicOptimizations::SymbolicOptimizations(bool full_run) { - m_manager = std::make_shared(); + m_manager = std::make_shared("Symbolic"); m_manager->set_per_pass_validation(false); #define REGISTER_SYMBOLIC(region, ...) m_manager->register_pass(__VA_ARGS__); diff --git a/src/core/include/openvino/pass/manager.hpp b/src/core/include/openvino/pass/manager.hpp index 8ca9ce354eeb5c..a026957697f2db 100644 --- a/src/core/include/openvino/pass/manager.hpp +++ b/src/core/include/openvino/pass/manager.hpp @@ -23,8 +23,11 @@ class OPENVINO_API Manager { Manager(); virtual ~Manager(); + //// \brief Construct Manager with a provided name. + explicit Manager(std::string name); + //// \brief Construct Manager with shared PassConfig instance - explicit Manager(std::shared_ptr pass_config); + explicit Manager(std::shared_ptr pass_config, std::string name = "UnnamedManager"); /// \brief Register given transformation class type to execution list /// Example below show the basic usage of pass::Manager @@ -66,11 +69,8 @@ class OPENVINO_API Manager { /// /// \return Returns true if the model was changed by transformations, /// false otherwise. - bool run_passes(std::shared_ptr model); + bool run_passes(const std::shared_ptr& model); - void set_pass_visualization(bool new_state) { - m_visualize = new_state; - } /// \brief Set flag to enable/disable running Validate pass after executing /// each registered pass /// \param new_state Value "true" enables Validate pass run; "false", otherwise @@ -97,8 +97,11 @@ class OPENVINO_API Manager { std::shared_ptr m_pass_config; std::vector> m_pass_list; - bool m_visualize = false; bool m_per_pass_validation = true; + std::string m_name = "UnnamedManager"; + +private: + bool run_pass(const std::shared_ptr& pass, const std::shared_ptr& model, bool needs_validate); }; } // namespace pass } // namespace ov diff --git a/src/core/src/graph_util.cpp b/src/core/src/graph_util.cpp index fa36c61cb38d84..17780f7751d52e 100644 --- a/src/core/src/graph_util.cpp +++ b/src/core/src/graph_util.cpp @@ -326,7 +326,7 @@ void serialize(const std::shared_ptr& m, const std::string& xml_path, const std::string& bin_path, ov::pass::Serialize::Version version) { - ov::pass::Manager manager; + ov::pass::Manager manager("Serialize"); manager.register_pass(xml_path, bin_path, version); manager.run_passes(std::const_pointer_cast(m)); } @@ -339,7 +339,7 @@ void save_model(const std::shared_ptr& m, const std::string& ou ov::pass::compress_model_to_f16(cloned, postponed); } - ov::pass::Manager manager; + ov::pass::Manager manager("SaveModel"); manager.register_pass(); manager.register_pass(output_model, ""); manager.run_passes(std::move(cloned)); diff --git a/src/core/src/model.cpp b/src/core/src/model.cpp index 88609222885f05..1493d950cd78ef 100644 --- a/src/core/src/model.cpp +++ b/src/core/src/model.cpp @@ -870,7 +870,7 @@ void ov::Model::reshape(const std::map, ov::PartialShape>& }; try { - ov::pass::Manager ssr_manager; + ov::pass::Manager ssr_manager("SmartReshape"); ssr_manager.register_pass(); ssr_manager.run_passes(shared_from_this()); diff --git a/src/core/src/pass/convert_fp32_to_fp16.cpp b/src/core/src/pass/convert_fp32_to_fp16.cpp index 4ccd024e1cf830..aab1180ff47dfb 100644 --- a/src/core/src/pass/convert_fp32_to_fp16.cpp +++ b/src/core/src/pass/convert_fp32_to_fp16.cpp @@ -10,7 +10,7 @@ bool ov::pass::ConvertFP32ToFP16::run_on_model(const std::shared_ptr& f) { RUN_ON_MODEL_SCOPE(ConvertFP32ToFP16); - ov::pass::Manager m(get_pass_config()); + ov::pass::Manager m(get_pass_config(), "ConvertFP32ToFP16"); m.register_pass(precisions_map{{ov::element::f32, ov::element::f16}}); m.run_passes(f); return false; diff --git a/src/core/src/pass/manager.cpp b/src/core/src/pass/manager.cpp index 13404562a4318d..4b01c09a077bde 100644 --- a/src/core/src/pass/manager.cpp +++ b/src/core/src/pass/manager.cpp @@ -5,15 +5,18 @@ #include "openvino/pass/manager.hpp" #include +#include #include #include #include #include #include +#include #include "itt.hpp" #include "openvino/pass/graph_rewrite.hpp" #include "openvino/pass/visualize_tree.hpp" +#include "openvino/util/common_util.hpp" #include "openvino/util/env_util.hpp" #include "openvino/util/log.hpp" #include "perf_counters.hpp" @@ -36,35 +39,73 @@ PerfCounters& perf_counters() { #endif // ENABLE_PROFILING_ITT namespace { -bool getenv_visualize_tracing() { - return ov::util::getenv_bool("OV_ENABLE_VISUALIZE_TRACING"); -} -} // namespace -ov::pass::Manager::Manager() : m_pass_config(std::make_shared()), m_visualize(getenv_visualize_tracing()) {} +/** + * @brief EnvVar gets the environment variable value by name. + * It tries to interpret the value as boolean, if it fails then + * the original string value is stored. This behavior helps us to reduce the number + * of the additional env variables. + * + * Example of usage: + * if OV_ENABLE_PROFILE_PASS is true, it enables console output. + * if OV_ENABLE_PROFILE_PASS contains a path to file (string), the out logs + * will be re-directed to the file. + */ +class EnvVar { +public: + explicit EnvVar(const std::string& var) { + const auto& val = ov::util::getenv_string(var.c_str()); + std::set off = {"0", "false", "off"}; + std::set on = {"1", "true", "on"}; -ov::pass::Manager::~Manager() = default; + const auto& val_lower = ov::util::to_lower(var); + if (off.count(val_lower)) { + m_is_bool = true; + } else if (on.count(val_lower)) { + m_is_bool = true; + b_value = true; + } else { + s_value = val; + } + } -ov::pass::Manager::Manager(std::shared_ptr pass_config) - : m_pass_config(std::move(pass_config)), - m_visualize(getenv_visualize_tracing()) {} + /** + * @brief This ctor helps to activate/deactivate EnvVar from the code. + */ + explicit EnvVar(const std::string& var, bool activate) { + m_is_bool = true; + b_value = activate; + } -void ov::pass::Manager::set_per_pass_validation(bool new_state) { - m_per_pass_validation = new_state; -} + bool is_enabled() const { + return b_value || !s_value.empty(); + } + + bool is_bool() const { + return m_is_bool; + } + + const std::string& get_str() const { + return s_value; + } + +private: + bool m_is_bool = false; + bool b_value = false; + std::string s_value; +}; -namespace { class stopwatch { public: void start() { - if (m_active == false) { + if (!m_active) { m_active = true; m_start_time = m_clock.now(); } } void stop() { - if (m_active == true) { + if (m_active) { auto end_time = m_clock.now(); m_last_time = end_time - m_start_time; m_active = false; @@ -89,89 +130,184 @@ class stopwatch { bool m_active = false; std::chrono::nanoseconds m_last_time = std::chrono::high_resolution_clock::duration::zero(); }; -} // namespace -bool ov::pass::Manager::run_passes(shared_ptr func) { - OV_ITT_SCOPED_TASK(ov::itt::domains::core, "pass::Manager::run_passes"); - - static bool profile_enabled = ov::util::getenv_bool("OV_PROFILE_PASS_ENABLE"); - - size_t index = 0; - stopwatch pass_timer; - stopwatch overall_timer; - overall_timer.start(); - bool pass_applied = false; - bool function_changed = false; - bool needs_validate = false; - for (auto& pass : m_pass_list) { - if (m_pass_config->is_disabled(pass->get_type_info())) { - OPENVINO_DEBUG("Pass ", pass->get_name(), " is disabled"); - continue; +class Profiler { +public: + explicit Profiler(std::string manager_name) + : m_visualize("OV_ENABLE_VISUALIZE_TRACING"), + m_serialize("OV_ENABLE_SERIALIZE_TRACING"), + m_profile_pass("OV_ENABLE_PROFILE_PASS"), + m_manager_name(std::move(manager_name)) { + if (m_profile_pass.is_enabled() && !m_profile_pass.is_bool()) { + m_file.open(m_profile_pass.get_str(), std::ios_base::app); } + } - OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ov_pass, ov::pass::perf_counters()[pass->get_type_info()]); + ~Profiler() { + if (m_file.is_open()) { + m_file.close(); + } + } - pass_timer.start(); + void start_timer(const std::string& name) { + if (m_profile_pass.is_enabled()) { + stopwatches[name] = stopwatch(); + stopwatches[name].start(); - if (auto matcher_pass = dynamic_pointer_cast(pass)) { - // This checks is to skip the graph transformation when the graph pass relies on - // static shape but the function state is dynamic. - if (matcher_pass->get_property(PassProperty::REQUIRE_STATIC_SHAPE) && func->is_dynamic()) { - OPENVINO_DEBUG("Pass ", - pass->get_name(), - " requires static shape but the " - "model is dynamic. Skipping this transformation"); - continue; - } - // GraphRewrite is a temporary container for MatcherPass to make execution - // on on entire ov::Model - pass_applied = GraphRewrite(matcher_pass).run_on_model(func); - } else if (auto function_pass = dynamic_pointer_cast(pass)) { - // This checks is to skip the graph transformation when the graph pass relies on - // static shape but the function state is dynamic. - if (function_pass->get_property(PassProperty::REQUIRE_STATIC_SHAPE) && func->is_dynamic()) { - OPENVINO_DEBUG("Pass ", - pass->get_name(), - " requires static shape but the " - "model is dynamic. Skipping this transformation"); - continue; + bool is_pass_manager = name == m_manager_name; + if (is_pass_manager) { + std::cout << std::setw(25) << left; + std::cout << "PassManager started: " << m_manager_name << std::endl; } + } + } + + void stop_timer(const std::string& name, bool applied) { + if (m_profile_pass.is_enabled()) { + auto& stopwatch = stopwatches.at(name); + stopwatch.stop(); - if (dynamic_pointer_cast(pass)) { - if (needs_validate) { - function_pass->run_on_model(func); - needs_validate = false; + bool is_pass_manager = name == m_manager_name; + if (m_profile_pass.is_bool()) { + std::cout << std::setw(25) << left; + if (is_pass_manager) { + std::cout << "PassManager finished: "; + } else { + std::cout << " "; + } + std::cout << std::setw(60) << left << name; + std::cout << std::setw(5) << right << stopwatch.get_milliseconds() << "ms " << (applied ? "+" : "-") + << std::endl; + } else if (m_file.is_open()) { + if (is_pass_manager) { + m_file << "m;" << name << ";" << stopwatch.get_timer_value().count() << ";" << (applied ? "1" : "0") + << std::endl; + } else { + m_file << "t;" << name << ";" << m_manager_name << ";" << stopwatch.get_timer_value().count() << ";" + << (applied ? "1" : "0") << std::endl; } } else { - pass_applied = function_pass->run_on_model(func); + OPENVINO_THROW("The output file for logging transformation statistics is closed. " + "Recording of statistics is not possible."); } } + } - if (m_visualize) { - // visualizations and serializations will be named after the outermost function - const size_t num_digits_in_pass_index = 3; - std::string index_str = std::to_string(index); - index_str = std::string(num_digits_in_pass_index - index_str.length(), '0') + index_str; - auto base_filename = func->get_name() + std::string("_") + index_str + std::string("_") + pass->get_name(); - - if (m_visualize) { - auto file_ext = "svg"; - pass::VisualizeTree vt(base_filename + std::string(".") + file_ext); - vt.run_on_model(func); + void visualize(const shared_ptr& model, const std::string& pass_name) const { + static size_t viz_index = 0; + if (m_visualize.is_enabled()) { + const auto& filter = m_visualize.get_str(); + if (m_visualize.is_bool() || (pass_name.find(filter) != std::string::npos)) { + const auto& file_name = gen_file_name(model->get_name(), pass_name, viz_index++); + ov::pass::VisualizeTree vt(file_name + ".svg"); + vt.run_on_model(model); } } - index++; - pass_timer.stop(); - if (profile_enabled) { - cout << setw(7) << pass_timer.get_milliseconds() << "ms" << (pass_applied ? " + " : " ") - << pass->get_name() << "\n"; + } + + void serialize(const shared_ptr& model, const std::string& pass_name) const { + static size_t serialize_index = 0; + if (m_serialize.is_enabled()) { + const auto& filter = m_serialize.get_str(); + if (m_serialize.is_bool() || (pass_name.find(filter) != std::string::npos)) { + const auto& file_name = gen_file_name(model->get_name(), pass_name, serialize_index++); + ov::pass::Serialize serialize(file_name + ".xml", file_name + ".bin"); + serialize.run_on_model(model); + } } - function_changed = function_changed || pass_applied; - needs_validate = pass_applied; } - if (profile_enabled) { - cout << "passes done in " << overall_timer.get_milliseconds() << "ms\n"; + +private: + static std::string gen_file_name(const std::string& model_name, const std::string& pass_name, const size_t idx) { + std::stringstream name; + // visualizations and serializations will be named after the outermost function + const size_t num_digits_in_pass_index = 3; + std::string index_str = std::to_string(idx); + index_str = std::string(num_digits_in_pass_index - index_str.length(), '0') + index_str; + + name << model_name << std::string("_") << index_str << std::string("_") << pass_name; + return name.str(); } - return function_changed; + std::unordered_map stopwatches; + + EnvVar m_visualize; + EnvVar m_serialize; + EnvVar m_profile_pass; + + std::string m_manager_name; + std::fstream m_file; +}; + +} // namespace + +ov::pass::Manager::Manager() : m_pass_config(std::make_shared()) {} + +ov::pass::Manager::~Manager() = default; + +ov::pass::Manager::Manager(std::string name) : m_pass_config(std::make_shared()), m_name(std::move(name)) {} + +ov::pass::Manager::Manager(std::shared_ptr pass_config, std::string name) + : m_pass_config(std::move(pass_config)), + m_name(std::move(name)) {} + +void ov::pass::Manager::set_per_pass_validation(bool new_state) { + m_per_pass_validation = new_state; +} + +bool ov::pass::Manager::run_passes(const shared_ptr& model) { + OV_ITT_SCOPED_TASK(ov::itt::domains::core, "pass::Manager::run_passes"); + Profiler profiler(m_name); + + bool model_changed = false; + bool pass_changed_model = false; + + profiler.start_timer(m_name); + for (const auto& pass : m_pass_list) { + const auto& pass_name = pass->get_name(); + + profiler.start_timer(pass_name); + pass_changed_model = run_pass(pass, model, pass_changed_model); + profiler.stop_timer(pass_name, pass_changed_model); + + model_changed = model_changed || pass_changed_model; + + profiler.visualize(model, pass_name); + profiler.serialize(model, pass_name); + } + profiler.stop_timer(m_name, model_changed); + + return model_changed; +} + +bool ov::pass::Manager::run_pass(const std::shared_ptr& pass, + const std::shared_ptr& model, + bool needs_validate) { + if (m_pass_config->is_disabled(pass->get_type_info())) { + OPENVINO_DEBUG("Pass ", pass->get_name(), " is disabled."); + return false; + } + + // This checks if we need to skip the graph transformation when the graph pass relies on + // static shape but the model state is dynamic. + if (pass->get_property(PassProperty::REQUIRE_STATIC_SHAPE) && model->is_dynamic()) { + OPENVINO_DEBUG("Pass ", + pass->get_name(), + " requires static shape but the ", + "model is dynamic. Skipping this transformation."); + return false; + } + + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ov_pass, ov::pass::perf_counters()[pass->get_type_info()]); + + if (auto matcher_pass = dynamic_pointer_cast(pass)) { + // GraphRewrite is a temporary container for MatcherPass to make execution on entire ov::Model + return GraphRewrite(matcher_pass).run_on_model(model); + } else if (auto model_pass = dynamic_pointer_cast(pass)) { + if (dynamic_pointer_cast(model_pass) && !needs_validate) { + return false; + } + return model_pass->run_on_model(model); + } + return false; } diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index 8630fbefe728ec..8b19b07f2f5d76 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -101,7 +101,7 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr(position_ids); // it is not always required, so will be disposed if not needed - ov::pass::Manager manager; + ov::pass::Manager manager("SDPA to PA"); manager.set_per_pass_validation(false); manager.register_pass(kv_parameters, model_remaining_params, diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp index 74553520410cda..b8e92e8b37b291 100644 --- a/src/frontends/ir/src/frontend.cpp +++ b/src/frontends/ir/src/frontend.cpp @@ -252,7 +252,7 @@ std::string FrontEnd::get_name() const { } void FrontEnd::normalize(const std::shared_ptr& model) const { - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:IR:normalize"); manager.register_pass(); manager.run_passes(model); } diff --git a/src/frontends/onnx/frontend/src/frontend.cpp b/src/frontends/onnx/frontend/src/frontend.cpp index 529869a7d936d0..d4b83fee20db82 100644 --- a/src/frontends/onnx/frontend/src/frontend.cpp +++ b/src/frontends/onnx/frontend/src/frontend.cpp @@ -93,7 +93,7 @@ std::shared_ptr FrontEnd::convert_partially(const InputModel::Ptr& in if (!m_transformation_extensions.empty()) { auto model = decode(input_model); - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:ONNX:convert_partially"); for (const auto& transformation : m_transformation_extensions) { transformation->register_pass(manager); } @@ -113,7 +113,7 @@ std::shared_ptr FrontEnd::convert_partially(const InputModel::Ptr& in void FrontEnd::normalize(const std::shared_ptr& model) const { // Here, you can register transformations as a second step of importing process // In particular, you can operate on not supported ops (it allows to N:N ONNX->OV mapping). - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:ONNX:normalize"); manager.register_pass(true); manager.run_passes(model); } @@ -125,7 +125,7 @@ std::shared_ptr FrontEnd::convert(const InputModel::Ptr& input_model) if (!m_transformation_extensions.empty()) { auto model = decode(input_model); - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:ONNX:convert"); for (const auto& transformation : m_transformation_extensions) { transformation->register_pass(manager); } diff --git a/src/frontends/paddle/src/frontend.cpp b/src/frontends/paddle/src/frontend.cpp index 0b6ab0d4eef331..c6febe08437b5d 100644 --- a/src/frontends/paddle/src/frontend.cpp +++ b/src/frontends/paddle/src/frontend.cpp @@ -343,7 +343,7 @@ std::map> FrontEnd::convert_each_node_recurs void FrontEnd::try_remove_internal_ops(const std::vector>& models) const { for (auto& model : models) { - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:Paddle:try_remove_internal_ops"); manager.register_pass(models); manager.register_pass(models); manager.register_pass(models); @@ -357,7 +357,7 @@ void FrontEnd::try_remove_internal_ops(const std::vector> void FrontEnd::fuse_fakequantize_ops(const std::vector>& models) const { for (auto& model : models) { - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:Paddle:fuse_fakequantize_ops"); manager.register_pass(); manager.run_passes(model); } @@ -506,7 +506,7 @@ std::shared_ptr FrontEnd::convert_partially(const InputModel::Ptr& mo if (!m_transformation_extensions.empty()) { auto function = decode(model); - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:Paddle:convert_partially"); for (const auto& transformation : m_transformation_extensions) { transformation->register_pass(manager); } @@ -572,7 +572,7 @@ void FrontEnd::add_extension(const std::shared_ptr& extension) { } void FrontEnd::normalize(const std::shared_ptr& model) const { - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:Paddle:normalize"); manager.register_pass(true); manager.run_passes(model); } diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 7e63f3c4aeb456..2cbb5c4d6bc96e 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -240,7 +240,7 @@ std::shared_ptr FrontEnd::decode(const InputModel::Ptr& model) const { } void FrontEnd::normalize(const std::shared_ptr& model) const { - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:Pytorch:normalize"); // GPTQ transformations need to be executed before other passes // Once the GPTQ patterns are modified by other transformations, diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp index e2e49355e914f0..aac5811223e135 100644 --- a/src/frontends/tensorflow/src/frontend.cpp +++ b/src/frontends/tensorflow/src/frontend.cpp @@ -488,7 +488,7 @@ std::shared_ptr FrontEnd::convert_partially(const ov::frontend::Input if (!m_transformation_extensions.empty()) { auto function = decode(model); - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:TF:convert_partially"); for (const auto& transformation : m_transformation_extensions) { transformation->register_pass(manager); } @@ -557,7 +557,7 @@ void FrontEnd::convert(const std::shared_ptr& partiallyConverted) con } void FrontEnd::normalize(const std::shared_ptr& model) const { - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:TF:normalize"); // Mark quantized and f16/bf16 compressed constants to prevent CF for them, // so that not extra memory is used for intermediate decompressed constants. diff --git a/src/frontends/tensorflow_lite/src/frontend.cpp b/src/frontends/tensorflow_lite/src/frontend.cpp index 62d75d9c2513c6..15e89301983811 100644 --- a/src/frontends/tensorflow_lite/src/frontend.cpp +++ b/src/frontends/tensorflow_lite/src/frontend.cpp @@ -111,7 +111,7 @@ std::shared_ptr FrontEnd::convert(const ov::frontend::InputModel::Ptr if (!m_transformation_extensions.empty()) { auto ov_model = decode(model); - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:TFLite:convert"); for (const auto& transformation : m_transformation_extensions) { transformation->register_pass(manager); } @@ -153,7 +153,7 @@ void FrontEnd::convert(const std::shared_ptr& partiallyConverted) con std::shared_ptr FrontEnd::convert_partially(const ov::frontend::InputModel::Ptr& model) const { if (!m_transformation_extensions.empty()) { auto function = decode(model); - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:TFLite:convert_partially"); for (const auto& transformation : m_transformation_extensions) { transformation->register_pass(manager); } @@ -293,7 +293,7 @@ std::shared_ptr FrontEnd::decode(const InputModel::Ptr& model) const } void FrontEnd::normalize(const std::shared_ptr& function) const { - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:TFLite:normalize"); // Mark quantized and f16/bf16 compressed constants to prevent CF for them, // so that not extra memory is used for intermediate decompressed constants. manager.register_pass(); diff --git a/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp b/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp index aa38ed72d839e5..ffa8438817bb77 100644 --- a/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp +++ b/src/frontends/tensorflow_lite/src/tflite_transformations/tflite_quantize_resolver.cpp @@ -198,7 +198,7 @@ pass::TFLQuantizeReplacer::TFLQuantizeReplacer() { } bool pass::TFLQuantizeResolver::run_on_model(const std::shared_ptr& m) { - ov::pass::Manager manager; + ov::pass::Manager manager("Frontend:TFLite:TFLQuantizeResolver"); manager.register_pass(); manager.register_pass(); manager.run_passes(m); diff --git a/src/plugins/auto_batch/src/plugin.cpp b/src/plugins/auto_batch/src/plugin.cpp index d97987bea6f39a..2e3e17cd43086d 100644 --- a/src/plugins/auto_batch/src/plugin.cpp +++ b/src/plugins/auto_batch/src/plugin.cpp @@ -162,7 +162,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< const bool check_dims = (enable_tput_plugin || enable_tput_cfg); // find the batch dim auto cloned_model = model->clone(); - ov::pass::Manager pass_manager; + ov::pass::Manager pass_manager("Plugin:AutoBatch"); pass_manager.register_pass(); pass_manager.register_pass(false, check_dims); pass_manager.run_passes(cloned_model); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp index 934a86bbc8b30e..8667f85e380449 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp @@ -29,7 +29,7 @@ namespace intel_cpu { inline void ConvertToCPUSpecificOpset(std::shared_ptr &nGraphFunc, int subStreamNum) { RUN_ON_FUNCTION_SCOPE(ConvertToCPUSpecificOpset); - ov::pass::Manager manager; + ov::pass::Manager manager("CPU:ConvertToCPUSpecificOpset"); manager.set_per_pass_validation(false); CPU_REGISTER_PASS_COMMON(manager, ConvertMatMulToFC); CPU_REGISTER_PASS_X64(manager, MoveFCReshapeToWeights); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index e8ac0bbd9748ad..cd8ce3062c815b 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -307,7 +307,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis // Decompression handling related transformations must be run separately from common preLPT pipeline // since there is used the same transformations as in LPT related transformations, but with the specific settings. // This must be done in order to keep compressed MatMul weights with decompression operations as is - ov::pass::Manager decompression_handling_manager; + ov::pass::Manager decompression_handling_manager("CPU:DecompressionHandling"); decompression_handling_manager.set_per_pass_validation(false); CPU_REGISTER_PASS_COMMON(decompression_handling_manager, ov::pass::InitNodeInfo); const bool useLpt = !defaultPrecisions.empty(); @@ -346,7 +346,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis ov::pass::ConvertGatherToGatherCompressed); decompression_handling_manager.run_passes(model); - ov::pass::Manager manager; + ov::pass::Manager manager("Plugin:CPU"); manager.set_per_pass_validation(false); if (useLpt) CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationSubgraph, defaultPrecisions); @@ -732,7 +732,7 @@ void Transformations::Lpt(const std::vector& defaultPrecision QuantizationGranularityRestriction::create({0}) }); - ov::pass::Manager lptManager; + ov::pass::Manager lptManager("CPU:LPT"); CPU_REGISTER_PASS_COMMON(lptManager, LowPrecision, supportedPrecisions, quantizationRestrictions, @@ -781,7 +781,7 @@ void Transformations::Lpt(const std::vector& defaultPrecision void Transformations::PostLpt() { CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, PostLpt); - ov::pass::Manager postLPTPassManager; + ov::pass::Manager postLPTPassManager("CPU:PostLPT"); postLPTPassManager.set_per_pass_validation(false); CPU_REGISTER_PASS_COMMON(postLPTPassManager, ov::pass::UnrollTensorIterator); CPU_REGISTER_PASS_COMMON(postLPTPassManager, ov::pass::ReshapePRelu); @@ -904,7 +904,7 @@ void Transformations::MainSnippets(void) { mha_token_enable_transpose_on_output, is_dynamic_mha_token_enabled, mha_supported_transpose_ranks); - ov::pass::Manager snippetsManager; + ov::pass::Manager snippetsManager("CPU:Snippets"); snippetsManager.set_per_pass_validation(false); if (!ignoreCallback) { #if defined(OPENVINO_ARCH_ARM64) @@ -1106,7 +1106,7 @@ void Transformations::MainSnippets(void) { } void Transformations::PostSnippets(void) { - ov::pass::Manager postSnippetsManager; + ov::pass::Manager postSnippetsManager("CPU:PostSnippets"); postSnippetsManager.set_per_pass_validation(false); CPU_REGISTER_PASS_COMMON(postSnippetsManager, ov::pass::FakeQuantizeDecomposition); CPU_SET_CALLBACK_COMMON(postSnippetsManager, diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_convolution.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_convolution.cpp index 6a61eae3ed4996..656b4c6fd99c20 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_convolution.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_convolution.cpp @@ -227,7 +227,7 @@ ConvolutionMatcher::ConvolutionMatcher() { } bool ConvertConvolutionToInternal::run_on_model(const std::shared_ptr& m) { - ov::pass::Manager manager; + ov::pass::Manager manager("ConvertConvolutionToInternal"); auto pass_config = manager.get_pass_config(); manager.set_per_pass_validation(false); manager.register_pass(); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index dbe7e858c1e6fe..e99d3851974629 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -208,7 +208,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { bool enableInt8; bool unroll_loop = config.get_property(ov::intel_gpu::enable_loop_unrolling); { - ov::pass::Manager manager; + ov::pass::Manager manager("Plugin:GPU"); auto pass_config = manager.get_pass_config(); manager.set_per_pass_validation(false); @@ -667,7 +667,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { QuantizationGranularityRestriction::create({0}), }); - ov::pass::Manager lptManager; + ov::pass::Manager lptManager("GPU:LPT"); auto lptPassConfig = lptManager.get_pass_config(); // quantized LSTMSequence / GPUSequence are not supported yet. Avoid extra transformation @@ -756,7 +756,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply::run_passes"); - ov::pass::Manager manager; + ov::pass::Manager manager("GPU:UnrollTensorIterator"); // This ConstantFolding pass is added to fold reshapes added for constant inputs on NMS internal operation which prevents upper-bound calculation // TODO: check why we have these reshapes manager.register_pass(); @@ -776,7 +776,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } { - ov::pass::Manager manager; + ov::pass::Manager manager("GPU:PostLPT"); // Other ops support eltwise fusions const std::vector allowed_data_movement_ops = { diff --git a/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp b/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp index c0881d75cd7be9..cc9655a38dd3ff 100644 --- a/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp +++ b/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp @@ -33,7 +33,7 @@ IRSerializer::IRSerializer(const std::shared_ptr& origModel, co void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weights) { _logger.debug("serializeModelToStream"); const auto passConfig = std::make_shared(); - ov::pass::Manager manager(passConfig); + ov::pass::Manager manager(passConfig, "NPU:serializeModelToStream"); if (_supportedOpset < 11) { // Downgrade to opset10 diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp index b0abb1c232e52b..ee885f67e188b5 100644 --- a/src/plugins/template/src/plugin.cpp +++ b/src/plugins/template/src/plugin.cpp @@ -64,7 +64,7 @@ ov::SoPtr ov::template_plugin::Plugin::get_default_context( // ! [plugin:transform_model] void transform_model(const std::shared_ptr& model) { // Perform common optimizations and device-specific transformations - ov::pass::Manager passManager; + ov::pass::Manager passManager("Plugin:Template"); // Example: register CommonOptimizations transformation from transformations library passManager.register_pass(); // Disable some transformations