From 6ab3bf4a70149fae5620924d18a53151694af89f Mon Sep 17 00:00:00 2001 From: Nadezhda Ageeva Date: Wed, 6 Sep 2023 18:09:24 +0400 Subject: [PATCH] [NVIDIA] Enable clang format (#716) * [NVIDIA] Enable clang format * [NVIDIA] Format code --- modules/nvidia_plugin/src/CMakeLists.txt | 16 +- modules/nvidia_plugin/src/cuda/graph.hpp | 21 +- .../src/cuda_async_infer_request.hpp | 3 +- .../src/cuda_eager_topology_runner.hpp | 3 +- .../nvidia_plugin/src/cuda_graph_context.hpp | 1 + .../nvidia_plugin/src/cuda_infer_request.hpp | 16 +- .../src/cuda_inference_request_context.hpp | 2 +- .../src/cuda_op_buffers_extractor.hpp | 2 +- .../src/cuda_operation_registry.hpp | 15 +- modules/nvidia_plugin/src/cuda_plugin.hpp | 7 +- modules/nvidia_plugin/src/cuda_profiler.hpp | 6 +- modules/nvidia_plugin/src/kernels/abs.hpp | 2 - modules/nvidia_plugin/src/kernels/cos.hpp | 2 - modules/nvidia_plugin/src/kernels/cosh.hpp | 2 - .../src/kernels/details/typed_functor.hpp | 8 +- modules/nvidia_plugin/src/kernels/exp.hpp | 2 - modules/nvidia_plugin/src/kernels/gelu.hpp | 1 - modules/nvidia_plugin/src/kernels/hswish.hpp | 2 - modules/nvidia_plugin/src/kernels/log.hpp | 2 - .../nvidia_plugin/src/kernels/logical_not.cuh | 5 +- modules/nvidia_plugin/src/kernels/mish.hpp | 2 - modules/nvidia_plugin/src/kernels/sin.hpp | 2 - modules/nvidia_plugin/src/kernels/sinh.hpp | 2 - modules/nvidia_plugin/src/kernels/sqrt.hpp | 2 - .../memory_manager/cuda_device_mem_block.hpp | 6 +- modules/nvidia_plugin/src/ops/abs.hpp | 2 +- modules/nvidia_plugin/src/ops/add_cuda.hpp | 3 +- modules/nvidia_plugin/src/ops/converters.hpp | 2 +- .../convolution_cudnn_components.hpp | 6 +- modules/nvidia_plugin/src/ops/cos.hpp | 2 +- modules/nvidia_plugin/src/ops/cosh.hpp | 2 +- modules/nvidia_plugin/src/ops/divide_cuda.hpp | 3 +- .../src/ops/elementwise_binary.hpp | 4 +- .../src/ops/elementwise_unary.hpp | 4 +- modules/nvidia_plugin/src/ops/elu.hpp | 9 +- modules/nvidia_plugin/src/ops/exp.hpp | 2 +- .../nvidia_plugin/src/ops/fake_quantize.hpp | 2 +- modules/nvidia_plugin/src/ops/floor.hpp | 2 +- modules/nvidia_plugin/src/ops/floor_mod.hpp | 3 +- modules/nvidia_plugin/src/ops/gelu.hpp | 2 +- modules/nvidia_plugin/src/ops/hswish.hpp | 2 +- modules/nvidia_plugin/src/ops/log.hpp | 2 +- modules/nvidia_plugin/src/ops/matmul.hpp | 3 +- modules/nvidia_plugin/src/ops/maximum.hpp | 3 +- modules/nvidia_plugin/src/ops/minimum.hpp | 3 +- modules/nvidia_plugin/src/ops/mish.hpp | 2 +- modules/nvidia_plugin/src/ops/mod.hpp | 3 +- .../nvidia_plugin/src/ops/multiply_cuda.hpp | 3 +- modules/nvidia_plugin/src/ops/mvn.hpp | 3 +- modules/nvidia_plugin/src/ops/prelu.hpp | 2 +- modules/nvidia_plugin/src/ops/reduce.hpp | 1 + modules/nvidia_plugin/src/ops/sin.hpp | 2 +- modules/nvidia_plugin/src/ops/sinh.hpp | 2 +- modules/nvidia_plugin/src/ops/sqrt.hpp | 2 +- .../src/ops/squared_difference.hpp | 3 +- .../nvidia_plugin/src/ops/strided_slice.hpp | 2 +- modules/nvidia_plugin/src/ops/subgraph.hpp | 1 + modules/nvidia_plugin/src/ops/subtract.hpp | 3 +- ...idirectional_lstm_sequence_composition.hpp | 3 +- .../src/transformer/concat_transformation.hpp | 3 +- .../transformer/cuda_graph_transformer.hpp | 30 +- .../src/transformer/nodes/activation_type.hpp | 3 +- .../nodes/lstm_sequence_optimized.hpp | 39 +- ...emove_redundant_convert_transformation.hpp | 1 - .../src/transformer/rt_info/cuda_node_id.hpp | 10 +- .../nvidia_plugin/src/utils/perf_timing.hpp | 13 +- .../tests/functional/CMakeLists.txt | 3 +- .../tests/functional/core_config.cpp | 11 +- .../executable_network/get_metric.cpp | 10 +- .../behavior/infer_request/infer_request.cpp | 2 +- .../ov_executable_network/properties.cpp | 20 +- .../behavior/plugin/config.cpp | 2 +- .../single_layer_tests/activation.cpp | 59 ++- .../single_layer_tests/activation_slt.cpp | 10 +- .../convolution_backprop_data.cpp | 11 +- .../convolution_backprop_data_add.cpp | 11 +- .../convolution_biasadd_activation.hpp | 2 +- .../single_layer_tests/cuda_eltwise.cpp | 2 +- .../single_layer_tests/detection_output.cpp | 10 +- .../single_layer_tests/finite_comparer.cpp | 11 +- .../single_layer_tests/fully_connected.cpp | 27 +- .../single_layer_tests/gather.cpp | 10 +- .../group_convolution_autogenerated.cpp | 4 +- .../group_convolution_biasadd_activation.cpp | 42 +- .../single_layer_tests/lstm_cell.cpp | 10 +- .../single_layer_tests/lstm_sequence.cpp | 145 ++++-- .../single_layer_tests/reduce.cpp | 414 ++++++++---------- .../single_layer_tests/tensor_iterator.cpp | 16 +- .../nvidia_plugin/tests/unit/CMakeLists.txt | 3 +- .../tests/unit/compile_model.cpp | 21 +- modules/nvidia_plugin/tests/unit/concat.cpp | 3 +- .../tests/unit/convert_benchmark.cpp | 10 +- .../tests/unit/cuda/wrappers/graph.cpp | 39 +- .../unit/cuda_graph_topology_runner_test.cpp | 46 +- .../tests/unit/is_cuda_graph_compatible.cpp | 10 +- modules/nvidia_plugin/tests/unit/limits.cpp | 15 +- ...da_immutable_memory_block_builder_test.cpp | 8 +- .../cuda_memory_manager_test.cpp | 13 +- ...da_immutable_memory_model_builder_test.cpp | 3 +- .../model/cuda_memory_model_builder_test.cpp | 12 +- .../tests/unit/op_buffers_extractor_test.cpp | 7 +- modules/nvidia_plugin/tests/unit/plugin.cpp | 14 +- modules/nvidia_plugin/tests/unit/result.cpp | 20 +- .../tests/unit/select_benchmark.cpp | 10 +- .../tests/unit/test_networks.hpp | 2 +- ...idirectional_lstm_sequence_composition.cpp | 317 +++++++------- .../transformations/concat_transformation.cpp | 8 +- .../fuse_conv_biasadd_activation.cpp | 376 +++++++++------- .../unit/transformations/fuse_matmul_add.cpp | 16 +- .../transformations/reduce_transformation.cpp | 25 +- ...emove_redundant_convert_transformation.cpp | 3 +- 111 files changed, 1101 insertions(+), 1036 deletions(-) diff --git a/modules/nvidia_plugin/src/CMakeLists.txt b/modules/nvidia_plugin/src/CMakeLists.txt index 5c3e6fd0d..9402ced16 100644 --- a/modules/nvidia_plugin/src/CMakeLists.txt +++ b/modules/nvidia_plugin/src/CMakeLists.txt @@ -10,15 +10,14 @@ set(HEADER_MASKS *.hpp *.cuh) file(GLOB_RECURSE SOURCES - RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" CONFIGURE_DEPENDS ${SOURCE_MASKS} ) -list(REMOVE_ITEM SOURCES cuda_create_plugin.cpp cuda_create_extensions.cpp) -list(FILTER SOURCES EXCLUDE REGEX "^ops/examples/.*$") +list(FILTER SOURCES EXCLUDE REGEX ".*cuda_create_plugin\\.cpp$") +list(FILTER SOURCES EXCLUDE REGEX ".*cuda_create_extensions\\.cpp$") +list(FILTER SOURCES EXCLUDE REGEX ".*ops/examples/.*$") file(GLOB_RECURSE HEADERS - RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" ${HEADER_MASKS} ) @@ -32,14 +31,15 @@ if(ENABLE_PROXY) endif() # Adds a shared library with plugin -ie_add_plugin(NAME ${TARGET_NAME} +ov_add_plugin(NAME ${TARGET_NAME} DEVICE_NAME "NVIDIA" SOURCES ${HEADERS} cuda_create_plugin.cpp cuda_create_extensions.cpp DEFAULT_CONFIG ${PLUGIN_DEFAULT_CONFIG} - VERSION_DEFINES_FOR cuda_create_plugin.cpp) + VERSION_DEFINES_FOR cuda_create_plugin.cpp + ADD_CLANG_FORMAT) # Enable support of CC for the plugin -ie_mark_target_as_cc(${TARGET_NAME}) +ov_mark_target_as_cc(${TARGET_NAME}) set_property(TARGET ${OBJ_NAME} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES}) @@ -77,6 +77,6 @@ target_link_libraries(${OBJ_NAME} # ATTENTION: uncomment to register a plugin in the plugins.xml file if(ENABLE_NVIDIA) - ie_register_plugins(MAIN_TARGET ${TARGET_NAME} + ov_register_plugins(MAIN_TARGET ${TARGET_NAME} POSSIBLE_PLUGINS ${TARGET_NAME}) endif() diff --git a/modules/nvidia_plugin/src/cuda/graph.hpp b/modules/nvidia_plugin/src/cuda/graph.hpp index 220c79820..2bbef1d08 100644 --- a/modules/nvidia_plugin/src/cuda/graph.hpp +++ b/modules/nvidia_plugin/src/cuda/graph.hpp @@ -4,15 +4,16 @@ #pragma once -#include "runtime.hpp" #include +#include "runtime.hpp" + namespace CUDA { class GraphCapture; class CaptureInfo; -class Graph: public Handle { +class Graph : public Handle { public: Graph(unsigned int flags); @@ -23,14 +24,14 @@ class Graph: public Handle { private: Graph(cudaGraph_t graph); - static cudaError_t createFromNative(cudaGraph_t *pGraph, cudaGraph_t anotherGraph); + static cudaError_t createFromNative(cudaGraph_t* pGraph, cudaGraph_t anotherGraph); static cudaGraph_t createNativeWithFlags(unsigned int flags); }; bool operator==(const Graph& rhs, const Graph& lhs); -class GraphExec: public Handle { +class GraphExec : public Handle { public: GraphExec(const Graph& g); @@ -73,16 +74,18 @@ class GraphCapture { private: Stream stream_; - cudaGraph_t cudaGraph_ {}; - cudaError_t capturedError_ {cudaSuccess}; - std::optional graph_ {}; + cudaGraph_t cudaGraph_{}; + cudaError_t capturedError_{cudaSuccess}; + std::optional graph_{}; }; class UploadNode { friend CaptureInfo; + public: void update_src(const GraphExec& exec, const void* src); bool operator==(const UploadNode& rhs) const; + private: UploadNode(cudaGraphNode_t node, CUDA::DevicePointer dst, const void* src, std::size_t size); cudaGraphNode_t node_; @@ -93,9 +96,11 @@ class UploadNode { class DownloadNode { friend CaptureInfo; + public: void update_dst(const GraphExec& exec, void* dst); bool operator==(const DownloadNode& rhs) const; + private: DownloadNode(cudaGraphNode_t node, void* dst, CUDA::DevicePointer src, std::size_t size); cudaGraphNode_t node_; @@ -118,4 +123,4 @@ class CaptureInfo { size_t depCount_; }; -}// namespace CUDA +} // namespace CUDA diff --git a/modules/nvidia_plugin/src/cuda_async_infer_request.hpp b/modules/nvidia_plugin/src/cuda_async_infer_request.hpp index 893e48332..6be416159 100644 --- a/modules/nvidia_plugin/src/cuda_async_infer_request.hpp +++ b/modules/nvidia_plugin/src/cuda_async_infer_request.hpp @@ -4,11 +4,10 @@ #pragma once +#include "cuda_infer_request.hpp" #include "openvino/runtime/iasync_infer_request.hpp" #include "openvino/runtime/iinfer_request.hpp" -#include "cuda_infer_request.hpp" - namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/cuda_eager_topology_runner.hpp b/modules/nvidia_plugin/src/cuda_eager_topology_runner.hpp index a5419d2c2..67230df6b 100644 --- a/modules/nvidia_plugin/src/cuda_eager_topology_runner.hpp +++ b/modules/nvidia_plugin/src/cuda_eager_topology_runner.hpp @@ -5,6 +5,7 @@ #pragma once #include + #include "cuda_itopology_runner.hpp" namespace ov { @@ -16,7 +17,7 @@ class EagerTopologyRunner final : public SubGraph, public ITopologyRunner { ~EagerTopologyRunner() override = default; void Run(const InferenceRequestContext& context, const DeviceMemBlock& memoryBlock) const override; - void UpdateContext(InferenceRequestContext& context, const DeviceMemBlock& memoryBlock) const override {}; + void UpdateContext(InferenceRequestContext& context, const DeviceMemBlock& memoryBlock) const override{}; const SubGraph& GetSubGraph() const override; }; diff --git a/modules/nvidia_plugin/src/cuda_graph_context.hpp b/modules/nvidia_plugin/src/cuda_graph_context.hpp index 682e18e70..c0ca01e18 100644 --- a/modules/nvidia_plugin/src/cuda_graph_context.hpp +++ b/modules/nvidia_plugin/src/cuda_graph_context.hpp @@ -5,6 +5,7 @@ #pragma once #include + #include "cuda_tensor_mapping_context.hpp" namespace ov { diff --git a/modules/nvidia_plugin/src/cuda_infer_request.hpp b/modules/nvidia_plugin/src/cuda_infer_request.hpp index 0010620f9..b01e2890c 100644 --- a/modules/nvidia_plugin/src/cuda_infer_request.hpp +++ b/modules/nvidia_plugin/src/cuda_infer_request.hpp @@ -13,18 +13,15 @@ #include #include - -#include "openvino/runtime/tensor.hpp" -#include "openvino/runtime/isync_infer_request.hpp" -#include "openvino/itt.hpp" - - #include "cancellation_token.hpp" #include "cuda_config.hpp" -#include "cuda_operation_base.hpp" #include "cuda_iexecution_delegator.hpp" +#include "cuda_operation_base.hpp" #include "memory_manager/cuda_memory_manager.hpp" #include "memory_manager/cuda_memory_pool.hpp" +#include "openvino/itt.hpp" +#include "openvino/runtime/isync_infer_request.hpp" +#include "openvino/runtime/tensor.hpp" #include "utils/perf_timing.hpp" namespace ov { @@ -33,7 +30,7 @@ namespace nvidia_gpu { class CompiledModel; // ! [infer_request:header] -class CudaInferRequest : public ov::ISyncInferRequest { +class CudaInferRequest : public ov::ISyncInferRequest { public: using Ptr = std::shared_ptr; @@ -52,7 +49,8 @@ class CudaInferRequest : public ov::ISyncInferRequest { void infer_postprocess(); void cancel(); - void set_tensors_impl(const ov::Output port, const std::vector>& tensors) override; + void set_tensors_impl(const ov::Output port, + const std::vector>& tensors) override; private: std::shared_ptr get_nvidia_model(); diff --git a/modules/nvidia_plugin/src/cuda_inference_request_context.hpp b/modules/nvidia_plugin/src/cuda_inference_request_context.hpp index 6eea4f1e6..3e2bb26fb 100644 --- a/modules/nvidia_plugin/src/cuda_inference_request_context.hpp +++ b/modules/nvidia_plugin/src/cuda_inference_request_context.hpp @@ -7,9 +7,9 @@ #include #include "cancellation_token.hpp" +#include "cuda_graph_context.hpp" #include "cuda_tensor_mapping_context.hpp" #include "cuda_thread_context.hpp" -#include "cuda_graph_context.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/cuda_op_buffers_extractor.hpp b/modules/nvidia_plugin/src/cuda_op_buffers_extractor.hpp index 814c74c56..5f7604808 100644 --- a/modules/nvidia_plugin/src/cuda_op_buffers_extractor.hpp +++ b/modules/nvidia_plugin/src/cuda_op_buffers_extractor.hpp @@ -14,8 +14,8 @@ #include #include -#include "openvino/core/node.hpp" #include "memory_manager/cuda_workbuffers.hpp" +#include "openvino/core/node.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/cuda_operation_registry.hpp b/modules/nvidia_plugin/src/cuda_operation_registry.hpp index 83d5b7495..89e5518e3 100644 --- a/modules/nvidia_plugin/src/cuda_operation_registry.hpp +++ b/modules/nvidia_plugin/src/cuda_operation_registry.hpp @@ -129,13 +129,14 @@ class OperationRegistry::Register { * where NodeOp is a type's inner alias for a concrete OpenVINO Node class * @param name - a textual operator's name */ -#define OPERATION_REGISTER(type, name) \ - extern "C" { \ - [[maybe_unused]] const ::ov::nvidia_gpu::OperationRegistry::Register openvino_cuda_op_register_##name{#name}; \ +#define OPERATION_REGISTER(type, name) \ + extern "C" { \ + [[maybe_unused]] const ::ov::nvidia_gpu::OperationRegistry::Register openvino_cuda_op_register_##name{ \ + #name}; \ } -#define OPERATION_REGISTER_FACTORY(factory, name) \ - extern "C" { \ - [[maybe_unused]] const ::ov::nvidia_gpu::OperationRegistry::Register openvino_cuda_op_register_##name{ \ - #name, factory}; \ +#define OPERATION_REGISTER_FACTORY(factory, name) \ + extern "C" { \ + [[maybe_unused]] const ::ov::nvidia_gpu::OperationRegistry::Register \ + openvino_cuda_op_register_##name{#name, factory}; \ } diff --git a/modules/nvidia_plugin/src/cuda_plugin.hpp b/modules/nvidia_plugin/src/cuda_plugin.hpp index f46b47069..44ac356a2 100644 --- a/modules/nvidia_plugin/src/cuda_plugin.hpp +++ b/modules/nvidia_plugin/src/cuda_plugin.hpp @@ -4,13 +4,12 @@ #pragma once +#include "cuda_compiled_model.hpp" +#include "cuda_config.hpp" +#include "cuda_thread_pool.hpp" #include "openvino/runtime/icompiled_model.hpp" #include "openvino/runtime/iplugin.hpp" #include "openvino/runtime/threading/itask_executor.hpp" - -#include "cuda_config.hpp" -#include "cuda_compiled_model.hpp" -#include "cuda_thread_pool.hpp" #include "transformer/cuda_graph_transformer.hpp" namespace ov { diff --git a/modules/nvidia_plugin/src/cuda_profiler.hpp b/modules/nvidia_plugin/src/cuda_profiler.hpp index 5eaa61dd0..cea8b53c7 100644 --- a/modules/nvidia_plugin/src/cuda_profiler.hpp +++ b/modules/nvidia_plugin/src/cuda_profiler.hpp @@ -96,8 +96,8 @@ class Profiler : public IExecutionDelegator { void collect_subgraphs(const SubGraph& graph, std::vector& vector); void collect_node_visitor(const OperationBase::Ptr& execStep, - std::vector& perfSteps, - std::vector& allExecSequence); + std::vector& perfSteps, + std::vector& allExecSequence); const CUDA::Stream* active_stream_ = nullptr; std::vector>> subgraph_perf_steps_map_; @@ -109,7 +109,7 @@ class Profiler : public IExecutionDelegator { std::array(PerfStages::NumOfStages)> durations_; Time::time_point start_{}; size_t infer_count_{}; - CUDA::Event::RecordMode cuda_event_record_mode_ {CUDA::Event::RecordMode::Default}; + CUDA::Event::RecordMode cuda_event_record_mode_{CUDA::Event::RecordMode::Default}; }; class Profiler::ProfileExecStep { diff --git a/modules/nvidia_plugin/src/kernels/abs.hpp b/modules/nvidia_plugin/src/kernels/abs.hpp index bfbbf2f90..82d8aae66 100644 --- a/modules/nvidia_plugin/src/kernels/abs.hpp +++ b/modules/nvidia_plugin/src/kernels/abs.hpp @@ -29,5 +29,3 @@ class Abs { } // namespace kernel } // namespace nvidia_gpu } // namespace ov - - diff --git a/modules/nvidia_plugin/src/kernels/cos.hpp b/modules/nvidia_plugin/src/kernels/cos.hpp index f87cacda9..f4c849c70 100644 --- a/modules/nvidia_plugin/src/kernels/cos.hpp +++ b/modules/nvidia_plugin/src/kernels/cos.hpp @@ -29,5 +29,3 @@ class Cos { } // namespace kernel } // namespace nvidia_gpu } // namespace ov - - diff --git a/modules/nvidia_plugin/src/kernels/cosh.hpp b/modules/nvidia_plugin/src/kernels/cosh.hpp index 7047c0e6a..e3669876f 100644 --- a/modules/nvidia_plugin/src/kernels/cosh.hpp +++ b/modules/nvidia_plugin/src/kernels/cosh.hpp @@ -29,5 +29,3 @@ class Cosh { } // namespace kernel } // namespace nvidia_gpu } // namespace ov - - diff --git a/modules/nvidia_plugin/src/kernels/details/typed_functor.hpp b/modules/nvidia_plugin/src/kernels/details/typed_functor.hpp index 591093188..751bbab5a 100644 --- a/modules/nvidia_plugin/src/kernels/details/typed_functor.hpp +++ b/modules/nvidia_plugin/src/kernels/details/typed_functor.hpp @@ -68,8 +68,8 @@ struct TypedFunctor : private std::array(type) - type_t_first_value; if (type_idx >= this->size()) { throw_ov_exception(fmt::format("TypedFunctor[Dimension={}]: Type = {} is not supported by TypedFunctor !!", - DIM_1D, - static_cast(type_idx))); + DIM_1D, + static_cast(type_idx))); } return std::array, type_count>::operator[](type_idx); } @@ -92,8 +92,8 @@ struct TypedFunctor : private std::array(type) - type_t_first_value; if (type_idx >= this->size()) { throw_ov_exception(fmt::format("TypedFunctor[Dimension={}]: Type = {} is not supported by TypedFunctor !!", - DIM_1D, - static_cast(type_idx))); + DIM_1D, + static_cast(type_idx))); } return std::array::operator[](type_idx); } diff --git a/modules/nvidia_plugin/src/kernels/exp.hpp b/modules/nvidia_plugin/src/kernels/exp.hpp index 299f5de2c..147f2addb 100644 --- a/modules/nvidia_plugin/src/kernels/exp.hpp +++ b/modules/nvidia_plugin/src/kernels/exp.hpp @@ -29,5 +29,3 @@ class Exp { } // namespace kernel } // namespace nvidia_gpu } // namespace ov - - diff --git a/modules/nvidia_plugin/src/kernels/gelu.hpp b/modules/nvidia_plugin/src/kernels/gelu.hpp index 5a0434d25..5b9a3528a 100644 --- a/modules/nvidia_plugin/src/kernels/gelu.hpp +++ b/modules/nvidia_plugin/src/kernels/gelu.hpp @@ -46,4 +46,3 @@ class GeluTanh { } // namespace kernel } // namespace nvidia_gpu } // namespace ov - diff --git a/modules/nvidia_plugin/src/kernels/hswish.hpp b/modules/nvidia_plugin/src/kernels/hswish.hpp index a9ded3cab..6f465d533 100644 --- a/modules/nvidia_plugin/src/kernels/hswish.hpp +++ b/modules/nvidia_plugin/src/kernels/hswish.hpp @@ -29,5 +29,3 @@ class HSwish { } // namespace kernel } // namespace nvidia_gpu } // namespace ov - - diff --git a/modules/nvidia_plugin/src/kernels/log.hpp b/modules/nvidia_plugin/src/kernels/log.hpp index 93472d44d..976264e14 100644 --- a/modules/nvidia_plugin/src/kernels/log.hpp +++ b/modules/nvidia_plugin/src/kernels/log.hpp @@ -29,5 +29,3 @@ class Log { } // namespace kernel } // namespace nvidia_gpu } // namespace ov - - diff --git a/modules/nvidia_plugin/src/kernels/logical_not.cuh b/modules/nvidia_plugin/src/kernels/logical_not.cuh index 07720870f..715cbc63c 100644 --- a/modules/nvidia_plugin/src/kernels/logical_not.cuh +++ b/modules/nvidia_plugin/src/kernels/logical_not.cuh @@ -16,7 +16,10 @@ namespace kernel { class LogicalNot { public: - LogicalNot(Type_t element_type, const eltwise::KernelExecAttrs& kernelExecAttrs, std::size_t payloadRank, std::size_t len); + LogicalNot(Type_t element_type, + const eltwise::KernelExecAttrs& kernelExecAttrs, + std::size_t payloadRank, + std::size_t len); void operator()(cudaStream_t stream, const bool* src, bool* dst) const; diff --git a/modules/nvidia_plugin/src/kernels/mish.hpp b/modules/nvidia_plugin/src/kernels/mish.hpp index 95c61bd80..f3ea679c8 100644 --- a/modules/nvidia_plugin/src/kernels/mish.hpp +++ b/modules/nvidia_plugin/src/kernels/mish.hpp @@ -29,5 +29,3 @@ class Mish { } // namespace kernel } // namespace nvidia_gpu } // namespace ov - - diff --git a/modules/nvidia_plugin/src/kernels/sin.hpp b/modules/nvidia_plugin/src/kernels/sin.hpp index ce71eaef6..10f0b6b9e 100644 --- a/modules/nvidia_plugin/src/kernels/sin.hpp +++ b/modules/nvidia_plugin/src/kernels/sin.hpp @@ -29,5 +29,3 @@ class Sin { } // namespace kernel } // namespace nvidia_gpu } // namespace ov - - diff --git a/modules/nvidia_plugin/src/kernels/sinh.hpp b/modules/nvidia_plugin/src/kernels/sinh.hpp index 9e1c528ec..50826bb78 100644 --- a/modules/nvidia_plugin/src/kernels/sinh.hpp +++ b/modules/nvidia_plugin/src/kernels/sinh.hpp @@ -29,5 +29,3 @@ class Sinh { } // namespace kernel } // namespace nvidia_gpu } // namespace ov - - diff --git a/modules/nvidia_plugin/src/kernels/sqrt.hpp b/modules/nvidia_plugin/src/kernels/sqrt.hpp index 4d772b11c..5a5f9b9f1 100644 --- a/modules/nvidia_plugin/src/kernels/sqrt.hpp +++ b/modules/nvidia_plugin/src/kernels/sqrt.hpp @@ -29,5 +29,3 @@ class Sqrt { } // namespace kernel } // namespace nvidia_gpu } // namespace ov - - diff --git a/modules/nvidia_plugin/src/memory_manager/cuda_device_mem_block.hpp b/modules/nvidia_plugin/src/memory_manager/cuda_device_mem_block.hpp index 666d94162..f6f9986e2 100644 --- a/modules/nvidia_plugin/src/memory_manager/cuda_device_mem_block.hpp +++ b/modules/nvidia_plugin/src/memory_manager/cuda_device_mem_block.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include #include "memory_manager/model/cuda_memory_model.hpp" @@ -56,9 +56,7 @@ class DeviceMemBlock { MemoryModel::Ptr memoryModel() const { return model_; } - CudaGraphContext& cudaGraphContext() { - return cuda_graph_context_; - } + CudaGraphContext& cudaGraphContext() { return cuda_graph_context_; } private: MemoryModel::Ptr model_; diff --git a/modules/nvidia_plugin/src/ops/abs.hpp b/modules/nvidia_plugin/src/ops/abs.hpp index 26bdd5f8a..4e5b22e39 100644 --- a/modules/nvidia_plugin/src/ops/abs.hpp +++ b/modules/nvidia_plugin/src/ops/abs.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/abs.hpp" #include "elementwise_unary.hpp" #include "kernels/abs.hpp" +#include "openvino/op/abs.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/add_cuda.hpp b/modules/nvidia_plugin/src/ops/add_cuda.hpp index 181deef41..cbaf195ad 100644 --- a/modules/nvidia_plugin/src/ops/add_cuda.hpp +++ b/modules/nvidia_plugin/src/ops/add_cuda.hpp @@ -4,10 +4,9 @@ #pragma once -#include "openvino/op/add.hpp" - #include "elementwise_binary.hpp" #include "kernels/add.hpp" +#include "openvino/op/add.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/converters.hpp b/modules/nvidia_plugin/src/ops/converters.hpp index 688371358..ac593cf9f 100644 --- a/modules/nvidia_plugin/src/ops/converters.hpp +++ b/modules/nvidia_plugin/src/ops/converters.hpp @@ -10,8 +10,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/modules/nvidia_plugin/src/ops/convolution_components/convolution_cudnn_components.hpp b/modules/nvidia_plugin/src/ops/convolution_components/convolution_cudnn_components.hpp index cc9cf989e..ff2d1859c 100644 --- a/modules/nvidia_plugin/src/ops/convolution_components/convolution_cudnn_components.hpp +++ b/modules/nvidia_plugin/src/ops/convolution_components/convolution_cudnn_components.hpp @@ -74,7 +74,8 @@ class ConvolutionDescriptorsCuDnn { public: ConvolutionDescriptorsCuDnn(const CreationContext& context, const Convolution::Details::ConvolutionParamsCuDnn& params, - const std::vector half_desc_types = {CUDNN_DATA_HALF, CUDNN_DATA_FLOAT}); + const std::vector half_desc_types = {CUDNN_DATA_HALF, + CUDNN_DATA_FLOAT}); cudnnDataType_t ElementType() const { return tensor_element_type_; } cudnnDataType_t DescType() const { return conv_desc_type_; } @@ -121,7 +122,8 @@ class ConvolutionBackpropDataDescriptorCuDnn { public: ConvolutionBackpropDataDescriptorCuDnn(const CreationContext& context, const Convolution::Details::ConvolutionBackpropDataParamsCuDnn& params, - const std::vector half_desc_types = {CUDNN_DATA_HALF, CUDNN_DATA_FLOAT}); + const std::vector half_desc_types = {CUDNN_DATA_HALF, + CUDNN_DATA_FLOAT}); cudnnDataType_t ElementType() const { return tensor_element_type_; } cudnnDataType_t DescType() const { return conv_desc_type_; } diff --git a/modules/nvidia_plugin/src/ops/cos.hpp b/modules/nvidia_plugin/src/ops/cos.hpp index ed19064a3..4633a5ecd 100644 --- a/modules/nvidia_plugin/src/ops/cos.hpp +++ b/modules/nvidia_plugin/src/ops/cos.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/cos.hpp" #include "elementwise_unary.hpp" #include "kernels/cos.hpp" +#include "openvino/op/cos.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/cosh.hpp b/modules/nvidia_plugin/src/ops/cosh.hpp index aea09acfe..be1a3c83e 100644 --- a/modules/nvidia_plugin/src/ops/cosh.hpp +++ b/modules/nvidia_plugin/src/ops/cosh.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/cosh.hpp" #include "elementwise_unary.hpp" #include "kernels/cosh.hpp" +#include "openvino/op/cosh.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/divide_cuda.hpp b/modules/nvidia_plugin/src/ops/divide_cuda.hpp index 18d6074d6..33b7bd524 100644 --- a/modules/nvidia_plugin/src/ops/divide_cuda.hpp +++ b/modules/nvidia_plugin/src/ops/divide_cuda.hpp @@ -4,10 +4,9 @@ #pragma once -#include "openvino/op/divide.hpp" - #include "elementwise_binary.hpp" #include "kernels/divide.hpp" +#include "openvino/op/divide.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/elementwise_binary.hpp b/modules/nvidia_plugin/src/ops/elementwise_binary.hpp index 8660ad2c2..3ada107a8 100644 --- a/modules/nvidia_plugin/src/ops/elementwise_binary.hpp +++ b/modules/nvidia_plugin/src/ops/elementwise_binary.hpp @@ -38,8 +38,8 @@ class ElementwiseBinaryOp : public OperationBase { const size_t max_threads_per_block = context.device().props().maxThreadsPerBlock; const size_t out_num_elements = ov::shape_size(node.get_output_shape(0)); - kernel_ = - Kernel{convertDataType(element_type), out_num_elements, max_threads_per_block}; + kernel_ = Kernel{ + convertDataType(element_type), out_num_elements, max_threads_per_block}; } void Execute(const InferenceRequestContext& context, diff --git a/modules/nvidia_plugin/src/ops/elementwise_unary.hpp b/modules/nvidia_plugin/src/ops/elementwise_unary.hpp index 2ad57732f..bbdac1cfb 100644 --- a/modules/nvidia_plugin/src/ops/elementwise_unary.hpp +++ b/modules/nvidia_plugin/src/ops/elementwise_unary.hpp @@ -31,8 +31,8 @@ class ElementwiseUnaryOp : public OperationBase { OPENVINO_ASSERT(input_shape == output_shape, "Node name: ", GetName()); size_t num_elements = ov::shape_size(input_shape); const size_t max_threads_per_block = context.device().props().maxThreadsPerBlock; - kernel_ = - Kernel{convertDataType(input_element_type), max_threads_per_block, num_elements}; + kernel_ = Kernel{ + convertDataType(input_element_type), max_threads_per_block, num_elements}; } void Execute(const InferenceRequestContext& context, diff --git a/modules/nvidia_plugin/src/ops/elu.hpp b/modules/nvidia_plugin/src/ops/elu.hpp index a939e276e..57e9f1ea2 100644 --- a/modules/nvidia_plugin/src/ops/elu.hpp +++ b/modules/nvidia_plugin/src/ops/elu.hpp @@ -5,9 +5,8 @@ #pragma once #include "cuda_operation_base.hpp" -#include "openvino/op/elu.hpp" - #include "kernels/elu.hpp" +#include "openvino/op/elu.hpp" namespace ov { namespace nvidia_gpu { @@ -15,9 +14,9 @@ namespace nvidia_gpu { class EluOp : public OperationBase { public: EluOp(const CreationContext& context, - const ov::Node& node, - IndexCollection&& inputIds, - IndexCollection&& outputIds); + const ov::Node& node, + IndexCollection&& inputIds, + IndexCollection&& outputIds); void Execute(const InferenceRequestContext& context, Inputs inputTensors, diff --git a/modules/nvidia_plugin/src/ops/exp.hpp b/modules/nvidia_plugin/src/ops/exp.hpp index 49b6ebb80..544a28e86 100644 --- a/modules/nvidia_plugin/src/ops/exp.hpp +++ b/modules/nvidia_plugin/src/ops/exp.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/exp.hpp" #include "elementwise_unary.hpp" #include "kernels/exp.hpp" +#include "openvino/op/exp.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/fake_quantize.hpp b/modules/nvidia_plugin/src/ops/fake_quantize.hpp index 8cce38858..a054f4520 100644 --- a/modules/nvidia_plugin/src/ops/fake_quantize.hpp +++ b/modules/nvidia_plugin/src/ops/fake_quantize.hpp @@ -4,10 +4,10 @@ #pragma once -#include "openvino/op/fake_quantize.hpp" #include "components/numpy_broadcast_params.h" #include "cuda_operation_base.hpp" #include "kernels/fake_quantize.hpp" +#include "openvino/op/fake_quantize.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/floor.hpp b/modules/nvidia_plugin/src/ops/floor.hpp index 5c86f37b9..71bca1019 100644 --- a/modules/nvidia_plugin/src/ops/floor.hpp +++ b/modules/nvidia_plugin/src/ops/floor.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/floor.hpp" #include "elementwise_unary.hpp" #include "kernels/floor.hpp" +#include "openvino/op/floor.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/floor_mod.hpp b/modules/nvidia_plugin/src/ops/floor_mod.hpp index f41e49137..ac3f6baf8 100644 --- a/modules/nvidia_plugin/src/ops/floor_mod.hpp +++ b/modules/nvidia_plugin/src/ops/floor_mod.hpp @@ -4,10 +4,9 @@ #pragma once -#include "openvino/op/floor_mod.hpp" - #include "elementwise_binary.hpp" #include "kernels/floor_mod.hpp" +#include "openvino/op/floor_mod.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/gelu.hpp b/modules/nvidia_plugin/src/ops/gelu.hpp index 8e2aef198..8274a2991 100644 --- a/modules/nvidia_plugin/src/ops/gelu.hpp +++ b/modules/nvidia_plugin/src/ops/gelu.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/gelu.hpp" #include "elementwise_unary.hpp" #include "kernels/gelu.hpp" +#include "openvino/op/gelu.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/hswish.hpp b/modules/nvidia_plugin/src/ops/hswish.hpp index f902a0973..93e2f8ec9 100644 --- a/modules/nvidia_plugin/src/ops/hswish.hpp +++ b/modules/nvidia_plugin/src/ops/hswish.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/hswish.hpp" #include "elementwise_unary.hpp" #include "kernels/hswish.hpp" +#include "openvino/op/hswish.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/log.hpp b/modules/nvidia_plugin/src/ops/log.hpp index 139c09a4d..09b0cd59d 100644 --- a/modules/nvidia_plugin/src/ops/log.hpp +++ b/modules/nvidia_plugin/src/ops/log.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/log.hpp" #include "elementwise_unary.hpp" #include "kernels/log.hpp" +#include "openvino/op/log.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/matmul.hpp b/modules/nvidia_plugin/src/ops/matmul.hpp index 90313836d..10dbe2884 100644 --- a/modules/nvidia_plugin/src/ops/matmul.hpp +++ b/modules/nvidia_plugin/src/ops/matmul.hpp @@ -4,13 +4,12 @@ #pragma once -#include "openvino/op/matmul.hpp" - #include #include #include #include "cuda/constant_factory.hpp" +#include "openvino/op/matmul.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/maximum.hpp b/modules/nvidia_plugin/src/ops/maximum.hpp index d772bb9ca..a8a0223a0 100644 --- a/modules/nvidia_plugin/src/ops/maximum.hpp +++ b/modules/nvidia_plugin/src/ops/maximum.hpp @@ -4,10 +4,9 @@ #pragma once -#include "openvino/op/maximum.hpp" - #include "elementwise_binary.hpp" #include "kernels/maximum.hpp" +#include "openvino/op/maximum.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/minimum.hpp b/modules/nvidia_plugin/src/ops/minimum.hpp index b18355ae2..1f2f294fa 100644 --- a/modules/nvidia_plugin/src/ops/minimum.hpp +++ b/modules/nvidia_plugin/src/ops/minimum.hpp @@ -4,10 +4,9 @@ #pragma once -#include "openvino/op/minimum.hpp" - #include "elementwise_binary.hpp" #include "kernels/minimum.hpp" +#include "openvino/op/minimum.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/mish.hpp b/modules/nvidia_plugin/src/ops/mish.hpp index 2a3572407..67cb4b185 100644 --- a/modules/nvidia_plugin/src/ops/mish.hpp +++ b/modules/nvidia_plugin/src/ops/mish.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/mish.hpp" #include "elementwise_unary.hpp" #include "kernels/mish.hpp" +#include "openvino/op/mish.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/mod.hpp b/modules/nvidia_plugin/src/ops/mod.hpp index 622ecd463..fcb35e6b9 100644 --- a/modules/nvidia_plugin/src/ops/mod.hpp +++ b/modules/nvidia_plugin/src/ops/mod.hpp @@ -4,10 +4,9 @@ #pragma once -#include "openvino/op/mod.hpp" - #include "elementwise_binary.hpp" #include "kernels/mod.hpp" +#include "openvino/op/mod.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/multiply_cuda.hpp b/modules/nvidia_plugin/src/ops/multiply_cuda.hpp index e32abc55a..8eafe71a7 100644 --- a/modules/nvidia_plugin/src/ops/multiply_cuda.hpp +++ b/modules/nvidia_plugin/src/ops/multiply_cuda.hpp @@ -4,10 +4,9 @@ #pragma once -#include "openvino/op/multiply.hpp" - #include "elementwise_binary.hpp" #include "kernels/multiply.hpp" +#include "openvino/op/multiply.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/mvn.hpp b/modules/nvidia_plugin/src/ops/mvn.hpp index f1d4a9c3e..bb2c3e228 100644 --- a/modules/nvidia_plugin/src/ops/mvn.hpp +++ b/modules/nvidia_plugin/src/ops/mvn.hpp @@ -6,10 +6,9 @@ #include -#include "openvino/op/mvn.hpp" - #include "converters.hpp" #include "kernels/variance_normalization_factor.hpp" +#include "openvino/op/mvn.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/prelu.hpp b/modules/nvidia_plugin/src/ops/prelu.hpp index 487b8ca3f..9d8384a75 100644 --- a/modules/nvidia_plugin/src/ops/prelu.hpp +++ b/modules/nvidia_plugin/src/ops/prelu.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/prelu.hpp" #include "elementwise_binary.hpp" #include "kernels/prelu.hpp" +#include "openvino/op/prelu.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/reduce.hpp b/modules/nvidia_plugin/src/ops/reduce.hpp index 73113d4f5..35d9b4822 100644 --- a/modules/nvidia_plugin/src/ops/reduce.hpp +++ b/modules/nvidia_plugin/src/ops/reduce.hpp @@ -26,6 +26,7 @@ class ReduceOp : public OperationCuDnn { WorkbufferRequest GetWorkBufferRequest() const override; static cudnnDataType_t reduceCompType(const ov::Node& node); + private: cudnnDataType_t comp_type_; CUDA::DnnReduceTensorDescriptor reduce_desc_; diff --git a/modules/nvidia_plugin/src/ops/sin.hpp b/modules/nvidia_plugin/src/ops/sin.hpp index bb28adbeb..a18af3002 100644 --- a/modules/nvidia_plugin/src/ops/sin.hpp +++ b/modules/nvidia_plugin/src/ops/sin.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/sin.hpp" #include "elementwise_unary.hpp" #include "kernels/sin.hpp" +#include "openvino/op/sin.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/sinh.hpp b/modules/nvidia_plugin/src/ops/sinh.hpp index 5dcbc9f8c..d8868cbc8 100644 --- a/modules/nvidia_plugin/src/ops/sinh.hpp +++ b/modules/nvidia_plugin/src/ops/sinh.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/sinh.hpp" #include "elementwise_unary.hpp" #include "kernels/sinh.hpp" +#include "openvino/op/sinh.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/sqrt.hpp b/modules/nvidia_plugin/src/ops/sqrt.hpp index 3194c989c..b72954013 100644 --- a/modules/nvidia_plugin/src/ops/sqrt.hpp +++ b/modules/nvidia_plugin/src/ops/sqrt.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include "openvino/op/sqrt.hpp" #include "elementwise_unary.hpp" #include "kernels/sqrt.hpp" +#include "openvino/op/sqrt.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/squared_difference.hpp b/modules/nvidia_plugin/src/ops/squared_difference.hpp index a131e661c..2d7c046c6 100644 --- a/modules/nvidia_plugin/src/ops/squared_difference.hpp +++ b/modules/nvidia_plugin/src/ops/squared_difference.hpp @@ -4,10 +4,9 @@ #pragma once -#include "openvino/op/squared_difference.hpp" - #include "elementwise_binary.hpp" #include "kernels/squared_difference.hpp" +#include "openvino/op/squared_difference.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/ops/strided_slice.hpp b/modules/nvidia_plugin/src/ops/strided_slice.hpp index c4578081d..f154e8967 100644 --- a/modules/nvidia_plugin/src/ops/strided_slice.hpp +++ b/modules/nvidia_plugin/src/ops/strided_slice.hpp @@ -11,7 +11,7 @@ namespace ov { namespace nvidia_gpu { -template +template class StridedSliceOp : public OperationBase { public: using NodeOp = ov::op::v1::StridedSlice; diff --git a/modules/nvidia_plugin/src/ops/subgraph.hpp b/modules/nvidia_plugin/src/ops/subgraph.hpp index 87d03cccc..443aeea96 100644 --- a/modules/nvidia_plugin/src/ops/subgraph.hpp +++ b/modules/nvidia_plugin/src/ops/subgraph.hpp @@ -8,6 +8,7 @@ #include #include #include + #include "openvino/op/util/sub_graph_base.hpp" namespace ov { diff --git a/modules/nvidia_plugin/src/ops/subtract.hpp b/modules/nvidia_plugin/src/ops/subtract.hpp index e09c650db..526777234 100644 --- a/modules/nvidia_plugin/src/ops/subtract.hpp +++ b/modules/nvidia_plugin/src/ops/subtract.hpp @@ -4,10 +4,9 @@ #pragma once -#include "openvino/op/subtract.hpp" - #include "elementwise_binary.hpp" #include "kernels/subtract.hpp" +#include "openvino/op/subtract.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/transformer/bidirectional_lstm_sequence_composition.hpp b/modules/nvidia_plugin/src/transformer/bidirectional_lstm_sequence_composition.hpp index ccc346c99..00a150af7 100644 --- a/modules/nvidia_plugin/src/transformer/bidirectional_lstm_sequence_composition.hpp +++ b/modules/nvidia_plugin/src/transformer/bidirectional_lstm_sequence_composition.hpp @@ -4,9 +4,10 @@ #pragma once -#include "openvino/pass/graph_rewrite.hpp" #include +#include "openvino/pass/graph_rewrite.hpp" + namespace ov::nvidia_gpu::pass { class Convert2LSTMSequenceToBidirectionalLSTMSequence : public ov::pass::ModelPass { diff --git a/modules/nvidia_plugin/src/transformer/concat_transformation.hpp b/modules/nvidia_plugin/src/transformer/concat_transformation.hpp index 9bcb89ec7..7de2c5540 100644 --- a/modules/nvidia_plugin/src/transformer/concat_transformation.hpp +++ b/modules/nvidia_plugin/src/transformer/concat_transformation.hpp @@ -4,9 +4,10 @@ #pragma once -#include "openvino/pass/graph_rewrite.hpp" #include +#include "openvino/pass/graph_rewrite.hpp" + namespace ov::nvidia_gpu::pass { class ConcatTransformation : public ov::pass::MatcherPass { diff --git a/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.hpp b/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.hpp index 402625590..92c0f37d3 100644 --- a/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.hpp +++ b/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.hpp @@ -5,31 +5,29 @@ #pragma once #include -#include "openvino/core/model.hpp" #include "cpp/ie_cnn_network.h" #include "cuda_config.hpp" +#include "openvino/core/model.hpp" namespace ov { namespace nvidia_gpu { class GraphTransformer { public: - /** - * @brief Transform takes an ov::Model and applies all the necessary - * CUDA-specific transformations to achieve the maximum optimization of the - * model for execution on a CUDA device. The transformations may - * include CUDA-specific op fusions and some common OpenVino - * transformations as well. - * @param function a valid shared ptr to a model, represented as an - * ov::Model instance. - * @param config a string-string map of configuration for loading an - * executable network (e.g. a model); this config influences on what exact - * transformations are being applied to the original graph. - */ - void transform(const CUDA::Device& device, - std::shared_ptr& model, - const Configuration& config) const; + /** + * @brief Transform takes an ov::Model and applies all the necessary + * CUDA-specific transformations to achieve the maximum optimization of the + * model for execution on a CUDA device. The transformations may + * include CUDA-specific op fusions and some common OpenVino + * transformations as well. + * @param function a valid shared ptr to a model, represented as an + * ov::Model instance. + * @param config a string-string map of configuration for loading an + * executable network (e.g. a model); this config influences on what exact + * transformations are being applied to the original graph. + */ + void transform(const CUDA::Device& device, std::shared_ptr& model, const Configuration& config) const; }; } // namespace nvidia_gpu diff --git a/modules/nvidia_plugin/src/transformer/nodes/activation_type.hpp b/modules/nvidia_plugin/src/transformer/nodes/activation_type.hpp index c6cd68560..8dff0fe23 100644 --- a/modules/nvidia_plugin/src/transformer/nodes/activation_type.hpp +++ b/modules/nvidia_plugin/src/transformer/nodes/activation_type.hpp @@ -22,7 +22,8 @@ template <> class AttributeAdapter : public EnumAttributeAdapterBase { public: - AttributeAdapter(nvidia_gpu::nodes::ActivationMode& value): EnumAttributeAdapterBase(value) {} + AttributeAdapter(nvidia_gpu::nodes::ActivationMode& value) + : EnumAttributeAdapterBase(value) {} OPENVINO_RTTI("AttributeAdapter"); }; diff --git a/modules/nvidia_plugin/src/transformer/nodes/lstm_sequence_optimized.hpp b/modules/nvidia_plugin/src/transformer/nodes/lstm_sequence_optimized.hpp index fa05d7f9f..882d430e9 100644 --- a/modules/nvidia_plugin/src/transformer/nodes/lstm_sequence_optimized.hpp +++ b/modules/nvidia_plugin/src/transformer/nodes/lstm_sequence_optimized.hpp @@ -17,9 +17,7 @@ class LSTMSequenceOptimized : public ov::op::util::RNNCellBase { LSTMSequenceOptimized() = default; ~LSTMSequenceOptimized() = default; - size_t get_default_output_index() const override { - return no_default_index(); - } + size_t get_default_output_index() const override { return no_default_index(); } explicit LSTMSequenceOptimized(const ov::Output& X, const ov::Output& initial_hidden_state, @@ -44,27 +42,13 @@ class LSTMSequenceOptimized : public ov::op::util::RNNCellBase { void validate_and_infer_types() override; - std::vector get_activations_alpha() const { - return m_activations_alpha; - } - std::vector get_activations_beta() const { - return m_activations_beta; - } - std::vector get_activations() const { - return m_activations; - } - float get_clip_threshold() const { - return m_clip; - } - ov::op::RecurrentSequenceDirection get_direction() const { - return m_direction; - } - void set_direction(const ov::op::RecurrentSequenceDirection& dir) { - m_direction = dir; - } - std::size_t get_hidden_size() const { - return m_hidden_size; - } + std::vector get_activations_alpha() const { return m_activations_alpha; } + std::vector get_activations_beta() const { return m_activations_beta; } + std::vector get_activations() const { return m_activations; } + float get_clip_threshold() const { return m_clip; } + ov::op::RecurrentSequenceDirection get_direction() const { return m_direction; } + void set_direction(const ov::op::RecurrentSequenceDirection& dir) { m_direction = dir; } + std::size_t get_hidden_size() const { return m_hidden_size; } private: ov::op::RecurrentSequenceDirection m_direction; @@ -72,16 +56,15 @@ class LSTMSequenceOptimized : public ov::op::util::RNNCellBase { }; } // namespace ov::nvidia_gpu::nodes - namespace ov { template <> class AttributeAdapter : public EnumAttributeAdapterBase { public: - AttributeAdapter(nvidia_gpu::nodes::LSTMSequenceOptimized::MajorFormat& value): EnumAttributeAdapterBase(value) {} + AttributeAdapter(nvidia_gpu::nodes::LSTMSequenceOptimized::MajorFormat& value) + : EnumAttributeAdapterBase(value) {} OPENVINO_RTTI("AttributeAdapter"); }; - -} // namespace ov +} // namespace ov diff --git a/modules/nvidia_plugin/src/transformer/remove_redundant_convert_transformation.hpp b/modules/nvidia_plugin/src/transformer/remove_redundant_convert_transformation.hpp index 54869b483..e76e7a6c4 100644 --- a/modules/nvidia_plugin/src/transformer/remove_redundant_convert_transformation.hpp +++ b/modules/nvidia_plugin/src/transformer/remove_redundant_convert_transformation.hpp @@ -12,7 +12,6 @@ class RemoveRedundantConvertTransformation : public ov::pass::ModelPass { public: OPENVINO_RTTI("RemoveRedundantConvertTransformation", "0"); bool run_on_model(const std::shared_ptr& m) override; - }; class MergeSubsequentConvertTransformation : public ov::pass::MatcherPass { diff --git a/modules/nvidia_plugin/src/transformer/rt_info/cuda_node_id.hpp b/modules/nvidia_plugin/src/transformer/rt_info/cuda_node_id.hpp index ea27c7fd1..83095b9d1 100644 --- a/modules/nvidia_plugin/src/transformer/rt_info/cuda_node_id.hpp +++ b/modules/nvidia_plugin/src/transformer/rt_info/cuda_node_id.hpp @@ -28,10 +28,8 @@ class CudaNodeId : public RuntimeAttribute { CudaNodeId() = default; - bool is_copyable() const override { - return false; - } + bool is_copyable() const override { return false; } }; -} // namespace rt_info -} // namespace nvidia_gpu -} // namespace ov +} // namespace rt_info +} // namespace nvidia_gpu +} // namespace ov diff --git a/modules/nvidia_plugin/src/utils/perf_timing.hpp b/modules/nvidia_plugin/src/utils/perf_timing.hpp index 6493fe8e7..b01614bd4 100644 --- a/modules/nvidia_plugin/src/utils/perf_timing.hpp +++ b/modules/nvidia_plugin/src/utils/perf_timing.hpp @@ -16,9 +16,16 @@ namespace ov::nvidia_gpu::utils { class PerformaceTiming { public: PerformaceTiming() = default; - PerformaceTiming(const CUDA::Stream& stream, CUDA::Event::RecordMode mode = CUDA::Event::RecordMode::Default) : start_{CUDA::Event{}} { start_->record(stream, mode); } - void setStart(const CUDA::Stream& stream, CUDA::Event::RecordMode mode = CUDA::Event::RecordMode::Default) { start_.emplace(CUDA::Event{}.record(stream, mode)); } - void setStop(const CUDA::Stream& stream, CUDA::Event::RecordMode mode = CUDA::Event::RecordMode::Default) { stop_.emplace(CUDA::Event{}.record(stream, mode)); } + PerformaceTiming(const CUDA::Stream& stream, CUDA::Event::RecordMode mode = CUDA::Event::RecordMode::Default) + : start_{CUDA::Event{}} { + start_->record(stream, mode); + } + void setStart(const CUDA::Stream& stream, CUDA::Event::RecordMode mode = CUDA::Event::RecordMode::Default) { + start_.emplace(CUDA::Event{}.record(stream, mode)); + } + void setStop(const CUDA::Stream& stream, CUDA::Event::RecordMode mode = CUDA::Event::RecordMode::Default) { + stop_.emplace(CUDA::Event{}.record(stream, mode)); + } float measure() { if (start_.has_value() && stop_.has_value()) { auto elapsed = stop_->elapsedSince(*start_); diff --git a/modules/nvidia_plugin/tests/functional/CMakeLists.txt b/modules/nvidia_plugin/tests/functional/CMakeLists.txt index 7033c5892..087070d47 100644 --- a/modules/nvidia_plugin/tests/functional/CMakeLists.txt +++ b/modules/nvidia_plugin/tests/functional/CMakeLists.txt @@ -12,7 +12,7 @@ else(ENABLE_INTEL_CPU) set(OPENVINO_ADDITIONAL_DEPENDENCIES) endif (ENABLE_INTEL_CPU) -addIeTargetTest( +ov_add_test_target( NAME ${TARGET_NAME} ROOT @@ -27,6 +27,7 @@ addIeTargetTest( gmock INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../include + ADD_CLANG_FORMAT LABELS CUDA ) diff --git a/modules/nvidia_plugin/tests/functional/core_config.cpp b/modules/nvidia_plugin/tests/functional/core_config.cpp index aece7fd6a..274bd4894 100644 --- a/modules/nvidia_plugin/tests/functional/core_config.cpp +++ b/modules/nvidia_plugin/tests/functional/core_config.cpp @@ -3,9 +3,9 @@ // #include "functional_test_utils/core_config.hpp" -#include "shared_test_classes/base/ov_subgraph.hpp" #include "cuda_test_constants.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" void CoreConfiguration(LayerTestsUtils::LayerTestsCommon* test) { std::shared_ptr core = PluginCache::get().ie(); @@ -16,7 +16,8 @@ void CoreConfiguration(LayerTestsUtils::LayerTestsCommon* test) { break; } } - // Set inference_precision hint to run fp32 model in fp32 runtime precision as default plugin execution precision may vary + // Set inference_precision hint to run fp32 model in fp32 runtime precision as default plugin execution precision + // may vary std::map config = {{ov::hint::inference_precision.name(), hint.get_type_name()}}; core->SetConfig(config, ov::test::utils::DEVICE_NVIDIA); } @@ -32,8 +33,10 @@ void core_configuration(ov::test::SubgraphBaseTest* test) { break; } } - // Set inference_precision hint to run fp32 model in fp32 runtime precision as default plugin execution precision may vary - test->core->set_property(ov::test::utils::DEVICE_NVIDIA, {{ov::hint::inference_precision.name(), hint.get_type_name()}}); + // Set inference_precision hint to run fp32 model in fp32 runtime precision as default plugin execution precision + // may vary + test->core->set_property(ov::test::utils::DEVICE_NVIDIA, + {{ov::hint::inference_precision.name(), hint.get_type_name()}}); } } // namespace test diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/executable_network/get_metric.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/executable_network/get_metric.cpp index e797596f6..81212d835 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/executable_network/get_metric.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/executable_network/get_metric.cpp @@ -18,23 +18,23 @@ namespace { INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS, - ::testing::Values(ov::test::utils::DEVICE_NVIDIA, /*"MULTI:NVIDIA", */"HETERO:NVIDIA")); + ::testing::Values(ov::test::utils::DEVICE_NVIDIA, /*"MULTI:NVIDIA", */ "HETERO:NVIDIA")); INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_METRICS, - ::testing::Values(ov::test::utils::DEVICE_NVIDIA, /*"MULTI:NVIDIA", */"HETERO:NVIDIA")); + ::testing::Values(ov::test::utils::DEVICE_NVIDIA, /*"MULTI:NVIDIA", */ "HETERO:NVIDIA")); INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_NETWORK_NAME, - ::testing::Values(ov::test::utils::DEVICE_NVIDIA, /*""MULTI:NVIDIA", */"HETERO:NVIDIA")); + ::testing::Values(ov::test::utils::DEVICE_NVIDIA, /*""MULTI:NVIDIA", */ "HETERO:NVIDIA")); INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS, - ::testing::Values(ov::test::utils::DEVICE_NVIDIA, /*"MULTI:NVIDIA", */"HETERO:NVIDIA")); + ::testing::Values(ov::test::utils::DEVICE_NVIDIA, /*"MULTI:NVIDIA", */ "HETERO:NVIDIA")); INSTANTIATE_TEST_SUITE_P(smoke_IEClassExecutableNetworkGetMetricTest_ThrowsUnsupported, IEClassExecutableNetworkGetMetricTest, - ::testing::Values(ov::test::utils::DEVICE_NVIDIA, /*"MULTI:NVIDIA", */"HETERO:NVIDIA")); + ::testing::Values(ov::test::utils::DEVICE_NVIDIA, /*"MULTI:NVIDIA", */ "HETERO:NVIDIA")); // // Executable Network GetConfig / SetConfig // diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/infer_request/infer_request.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/infer_request/infer_request.cpp index 788b190c1..e76935f10 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/infer_request/infer_request.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/infer_request/infer_request.cpp @@ -345,7 +345,7 @@ TEST_F(smoke_InferenceRequestTest, ParameterResult) { InferenceEngine::Core ie{}; InferenceEngine::Blob::Ptr a{}; auto testNet = ie.ReadNetwork(model10, a); - auto execNet = ie.LoadNetwork(testNet, "NVIDIA", {{ "INFERENCE_PRECISION_HINT", "f32"}}); + auto execNet = ie.LoadNetwork(testNet, "NVIDIA", {{"INFERENCE_PRECISION_HINT", "f32"}}); InferenceEngine::InferRequest request{execNet.CreateInferRequest()}; const InferenceEngine::ConstInputsDataMap inputsInfo{execNet.GetInputsInfo()}; diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp index 7d7607f53..095c30b3e 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp @@ -6,8 +6,8 @@ #include -#include "openvino/runtime/properties.hpp" #include "nvidia/properties.hpp" +#include "openvino/runtime/properties.hpp" using namespace ov::test::behavior; @@ -65,16 +65,14 @@ INSTANTIATE_TEST_SUITE_P(smoke_AutoBatch_BehaviorTests, ::testing::ValuesIn(auto_batch_inproperties)), OVCompiledModelPropertiesIncorrectTests::getTestCaseName); -const std::vector default_properties = { - {ov::num_streams(1)}, - {ov::hint::num_requests(0)}, - {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}, - {ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE)}, - {ov::enable_profiling(false)}, - {ov::device::id("0")}, - {ov::nvidia_gpu::operation_benchmark(false)}, - {ov::nvidia_gpu::use_cuda_graph(true)} -}; +const std::vector default_properties = {{ov::num_streams(1)}, + {ov::hint::num_requests(0)}, + {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}, + {ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE)}, + {ov::enable_profiling(false)}, + {ov::device::id("0")}, + {ov::nvidia_gpu::operation_benchmark(false)}, + {ov::nvidia_gpu::use_cuda_graph(true)}}; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelPropertiesDefaultTests, diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/plugin/config.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/plugin/config.cpp index eda10ce4f..e72c33b78 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/plugin/config.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/behavior/plugin/config.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include +#include #include "behavior/plugin/configuration_tests.hpp" diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp index c8454dcb2..5e9c20974 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp @@ -27,41 +27,40 @@ const std::vector intPrecisions = { // TODO commented tests don't work for CUDA now. // The reason there are missing correspondent operations or transformation const std::map>> activationTypes = { - {Sigmoid, {}}, - {Tanh, {}}, - {Relu, {}}, - {Exp, {}}, - {Log, {}}, + {Sigmoid, {}}, + {Tanh, {}}, + {Relu, {}}, + {Exp, {}}, + {Log, {}}, // {Sign, {}}, - {Abs, {}}, - {Clamp, {{-2.0f, 2.0f}}}, - {Negative, {}}, + {Abs, {}}, + {Clamp, {{-2.0f, 2.0f}}}, + {Negative, {}}, // {Acos, {}}, // {Asin, {}}, // {Atan, {}}, - {Cos, {}}, - {Cosh, {}}, - {Floor, {}}, - {Sin, {}}, - {Sinh, {}}, - {Sqrt, {}}, + {Cos, {}}, + {Cosh, {}}, + {Floor, {}}, + {Sin, {}}, + {Sinh, {}}, + {Sqrt, {}}, // {Tan, {}}, - {Elu, {{0.1f}}}, + {Elu, {{0.1f}}}, // {Erf, {}}, // {HardSigmoid, {{0.2f, 0.5f}}}, // {Selu, {{1.6732f, 1.0507f}}}, // {Ceiling, {}}, - {Mish, {}}, - {Swish, {{0.5f}}}, - {HSwish, {}}, + {Mish, {}}, + {Swish, {{0.5f}}}, + {HSwish, {}}, // {SoftPlus, {}}, - {HSigmoid, {}}, + {HSigmoid, {}}, // {RoundHalfToEven, {}}, // {RoundHalfAwayFromZero, {}}, - {Gelu, {}}, - {GeluErf, {}}, - {GeluTanh, {}} -}; + {Gelu, {}}, + {GeluErf, {}}, + {GeluTanh, {}}}; class CUDAActivationIntegerLayerTest : public ActivationLayerTest { void SetUp() override { @@ -72,13 +71,13 @@ class CUDAActivationIntegerLayerTest : public ActivationLayerTest { // List of operations that should be tested also with integer precision const std::map>> intActivationTypes = { - {Abs, {}}, - {Negative, {}}, - {Cos, {}}, - {Cosh, {}}, - {Sinh, {}}, - {Sqrt, {}}, - {Log, {}}, + {Abs, {}}, + {Negative, {}}, + {Cos, {}}, + {Cosh, {}}, + {Sinh, {}}, + {Sqrt, {}}, + {Log, {}}, }; const std::map>> preluActivationParamTypes = { diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/activation_slt.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/activation_slt.cpp index 3db1ca94d..7dc6ca89b 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/activation_slt.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/activation_slt.cpp @@ -2671,8 +2671,14 @@ struct ClampBenchmark : testing::Test { ov::nvidia_gpu::CancellationToken token{}; ov::nvidia_gpu::SimpleExecutionDelegator simpleExecutionDelegator{}; ov::nvidia_gpu::CudaGraphContext cudaGraphContext; - ov::nvidia_gpu::InferenceRequestContext context{ - emptyTensor, emptyMapping, emptyTensor, emptyMapping, threadContext, token, simpleExecutionDelegator, cudaGraphContext}; + ov::nvidia_gpu::InferenceRequestContext context{emptyTensor, + emptyMapping, + emptyTensor, + emptyMapping, + threadContext, + token, + simpleExecutionDelegator, + cudaGraphContext}; std::vector inHost(tesnorSize); std::random_device rDevice; diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp index c5da5603d..2b461e6d1 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp @@ -97,9 +97,9 @@ class ConvolutionBackpropDataExtendedLayerTest filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end()); std::shared_ptr filterWeightsNode; if (filterWeights.empty()) { - ov::Tensor random_tensor(type, filterWeightsShape); - ov::test::utils::fill_tensor_random(random_tensor); - filterWeightsNode = std::make_shared(random_tensor); + ov::Tensor random_tensor(type, filterWeightsShape); + ov::test::utils::fill_tensor_random(random_tensor); + filterWeightsNode = std::make_shared(random_tensor); } else { filterWeightsNode = std::make_shared(type, filterWeightsShape, filterWeights); } @@ -154,9 +154,10 @@ class ConvolutionBackpropDataExtendedLayerTest std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outputPad) = convBackpropDataParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(inputShape))}; + ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto outputShapeNode = std::make_shared(ov::element::Type_t::i64, ov::Shape{outputShapeData.size()}, outputShapeData); + auto outputShapeNode = std::make_shared( + ov::element::Type_t::i64, ov::Shape{outputShapeData.size()}, outputShapeData); auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); auto convBackpropData = std::dynamic_pointer_cast( diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data_add.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data_add.cpp index bbf2e51b5..2295fa386 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data_add.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data_add.cpp @@ -96,9 +96,9 @@ class ConvolutionBackpropDataAddExtendedLayerTest std::shared_ptr filterWeightsNode; if (filterWeights.empty()) { - ov::Tensor random_tensor(type, filterWeightsShape); - ov::test::utils::fill_tensor_random(random_tensor); - filterWeightsNode = std::make_shared(random_tensor); + ov::Tensor random_tensor(type, filterWeightsShape); + ov::test::utils::fill_tensor_random(random_tensor); + filterWeightsNode = std::make_shared(random_tensor); } else { filterWeightsNode = std::make_shared(type, filterWeightsShape, filterWeights); } @@ -152,9 +152,10 @@ class ConvolutionBackpropDataAddExtendedLayerTest std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outputPad) = convBackpropDataParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(inputShape))}; + ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto outputShapeNode = std::make_shared(ov::element::Type_t::i64, ov::Shape{outputShapeData.size()}, outputShapeData); + auto outputShapeNode = std::make_shared( + ov::element::Type_t::i64, ov::Shape{outputShapeData.size()}, outputShapeData); auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); auto convBackpropData = std::dynamic_pointer_cast( diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_biasadd_activation.hpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_biasadd_activation.hpp index eb11da94c..c5bf77205 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_biasadd_activation.hpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution_biasadd_activation.hpp @@ -163,7 +163,7 @@ class BasicConvolutionBiasAddActivationLayerTest } auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(inputShape))}; + ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/cuda_eltwise.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/cuda_eltwise.cpp index 700a44977..78e62867c 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/cuda_eltwise.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/cuda_eltwise.cpp @@ -208,7 +208,7 @@ void CudaEltwiseLayerTest::SetUp() { init_input_shapes(shapes); - ov::ParameterVector parameters {std::make_shared(netType, inputDynamicShapes.front())}; + ov::ParameterVector parameters{std::make_shared(netType, inputDynamicShapes.front())}; ov::PartialShape shape_input_secondary; switch (opType) { diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/detection_output.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/detection_output.cpp index 3a30b9d0c..5877fe9b9 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/detection_output.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/detection_output.cpp @@ -99,15 +99,19 @@ class CudaDetectionOutputLayerTest : public DetectionOutputLayerTest { const auto precent = static_cast(not_matched_ngraph_results.size()) / ngraph_results.size(); if (precent > 0.5) { OPENVINO_THROW("Too many elements not found in reference implementation ", - "with relative comparison of values with threshold ", std::to_string(threshold)); + "with relative comparison of values with threshold ", + std::to_string(threshold)); } for (const auto &[i, ref] : not_matched_ngraph_results) { auto res = std::find_if(ie_results.begin(), ie_results.end(), [ref = ref](const auto &res) { return ref.data.conf == res.data.conf; }); if (res == ie_results.end()) { - OPENVINO_THROW("Cannot find object (index=", std::to_string(i), - ") with relative comparison of values with threshold ", std::to_string(threshold), " failed"); + OPENVINO_THROW("Cannot find object (index=", + std::to_string(i), + ") with relative comparison of values with threshold ", + std::to_string(threshold), + " failed"); } ie_results.erase(res); } diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/finite_comparer.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/finite_comparer.cpp index 653a0ce3b..5c60d93b0 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/finite_comparer.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/finite_comparer.cpp @@ -130,8 +130,8 @@ inline void callCompare(const std::pair(reinterpret_cast(expectedOut.data()), actualBuffer, size, @@ -141,8 +141,8 @@ inline void callCompare(const std::pair(reinterpret_cast(expectedOut.data()), actualBuffer, size, @@ -176,8 +176,7 @@ void FiniteLayerComparer::Compare(const std::pairbyteSize() * k); diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/fully_connected.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/fully_connected.cpp index 1f7e0882c..fd3e470db 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/fully_connected.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/fully_connected.cpp @@ -82,7 +82,8 @@ class FullyConnectedLayerTest : public testing::WithParamInterface(ngPrc, ov::Shape{shapeRelatedParams.input1.first})}; + ov::ParameterVector params{ + std::make_shared(ngPrc, ov::Shape{shapeRelatedParams.input1.first})}; std::shared_ptr secondaryInput; if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) { @@ -94,7 +95,8 @@ class FullyConnectedLayerTest : public testing::WithParamInterface(ngPrc, shapeRelatedParams.input3); auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto MatMul = std::make_shared(paramOuts[0], secondaryInput, shapeRelatedParams.input1.second, shapeRelatedParams.input2.second); + auto MatMul = std::make_shared( + paramOuts[0], secondaryInput, shapeRelatedParams.input1.second, shapeRelatedParams.input2.second); auto Add = std::make_shared(MatMul, thirdInput); ov::ResultVector results{std::make_shared(Add)}; function = std::make_shared(results, params, "FullyConnected"); @@ -188,26 +190,33 @@ class FullyConnectedLayer2MatMulTest : public testing::WithParamInterface(ngPrc, ov::Shape(shapeRelatedParams.matmul1_input1.first))); - params.push_back(std::make_shared(ngPrc, ov::Shape(shapeRelatedParams.matmul2_input1.first))); + params.push_back( + std::make_shared(ngPrc, ov::Shape(shapeRelatedParams.matmul1_input1.first))); + params.push_back( + std::make_shared(ngPrc, ov::Shape(shapeRelatedParams.matmul2_input1.first))); std::shared_ptr matmul0SecondaryInput; if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) { - matmul0SecondaryInput = std::make_shared(ngPrc, ov::Shape(shapeRelatedParams.matmul1_input2.first)); + matmul0SecondaryInput = + std::make_shared(ngPrc, ov::Shape(shapeRelatedParams.matmul1_input2.first)); params.push_back(std::static_pointer_cast(matmul0SecondaryInput)); } else { - matmul0SecondaryInput = std::make_shared(ngPrc, shapeRelatedParams.matmul1_input2.first); + matmul0SecondaryInput = + std::make_shared(ngPrc, shapeRelatedParams.matmul1_input2.first); } std::shared_ptr matmul1SecondaryInput; if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) { - matmul1SecondaryInput = std::make_shared(ngPrc, ov::Shape(shapeRelatedParams.matmul2_input2.first)); + matmul1SecondaryInput = + std::make_shared(ngPrc, ov::Shape(shapeRelatedParams.matmul2_input2.first)); params.push_back(std::static_pointer_cast(matmul1SecondaryInput)); } else { - matmul1SecondaryInput = std::make_shared(ngPrc, shapeRelatedParams.matmul2_input2.first); + matmul1SecondaryInput = + std::make_shared(ngPrc, shapeRelatedParams.matmul2_input2.first); } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + auto paramOuts = + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); auto matMul0 = std::make_shared(paramOuts[0], matmul0SecondaryInput, shapeRelatedParams.matmul1_input1.second, diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/gather.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/gather.cpp index bd3d74bcd..26bc189c6 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/gather.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/gather.cpp @@ -716,8 +716,14 @@ void test_one_shape(const GatherTestParams& params, bool is_v7) { std::vector> emptyTensor; std::map emptyMapping; ov::nvidia_gpu::CudaGraphContext cudaGraphContext; - ov::nvidia_gpu::InferenceRequestContext context{ - emptyTensor, emptyMapping, emptyTensor, emptyMapping, threadContext, token, simpleExecutionDelegator, cudaGraphContext}; + ov::nvidia_gpu::InferenceRequestContext context{emptyTensor, + emptyMapping, + emptyTensor, + emptyMapping, + threadContext, + token, + simpleExecutionDelegator, + cudaGraphContext}; std::vector indices = generate_indices(params); std::vector dict(dict_size); std::random_device r_device; diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_autogenerated.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_autogenerated.cpp index ff40d9ca0..6c73909ea 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_autogenerated.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_autogenerated.cpp @@ -15,9 +15,7 @@ namespace LayerTestsDefinitions { class GroupConvolutionLayerThresholdTest : public FiniteComparer { protected: - void SetUp() override { - GroupConvolutionLayerTest::SetUp(); - } + void SetUp() override { GroupConvolutionLayerTest::SetUp(); } }; TEST_P(GroupConvolutionLayerThresholdTest, CompareWithRefs) { diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_biasadd_activation.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_biasadd_activation.cpp index 38add843d..70e1228c1 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_biasadd_activation.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_biasadd_activation.cpp @@ -132,13 +132,13 @@ const auto smoke_2D_ExplicitPaddingSymmetric_Params = ::testing::Combine( ::testing::ValuesIn(netActivations)); const auto smoke_2D_ExplicitPaddingSymmetric_Params2 = ::testing::Combine( - ::testing::Combine(::testing::Combine(::testing::Values(std::vector({3, 3})), // kernels - ::testing::Values(std::vector({2, 2})), // strides - ::testing::Values(std::vector({1, 1})), // pads_begin - ::testing::Values(std::vector({1, 1})), // pads_end - ::testing::Values(std::vector({1, 1})), // dilations - ::testing::Values(96), // out channels - ::testing::Values(2), // groups + ::testing::Combine(::testing::Combine(::testing::Values(std::vector({3, 3})), // kernels + ::testing::Values(std::vector({2, 2})), // strides + ::testing::Values(std::vector({1, 1})), // pads_begin + ::testing::Values(std::vector({1, 1})), // pads_end + ::testing::Values(std::vector({1, 1})), // dilations + ::testing::Values(96), // out channels + ::testing::Values(2), // groups ::testing::Values(ov::op::PadType::EXPLICIT)), ::testing::ValuesIn(netPrecisions), ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), @@ -150,13 +150,13 @@ const auto smoke_2D_ExplicitPaddingSymmetric_Params2 = ::testing::Combine( ::testing::ValuesIn(netActivations)); const auto smoke_2D_ExplicitPaddingSymmetric_Params3 = ::testing::Combine( - ::testing::Combine(::testing::Combine(::testing::Values(std::vector({1, 1})), // kernels - ::testing::Values(std::vector({1, 1})), // strides - ::testing::Values(std::vector({0, 0})), // pads_begin - ::testing::Values(std::vector({0, 0})), // pads_end - ::testing::Values(std::vector({1, 1})), // dilations - ::testing::Values(160), // out channels - ::testing::Values(2), // groups + ::testing::Combine(::testing::Combine(::testing::Values(std::vector({1, 1})), // kernels + ::testing::Values(std::vector({1, 1})), // strides + ::testing::Values(std::vector({0, 0})), // pads_begin + ::testing::Values(std::vector({0, 0})), // pads_end + ::testing::Values(std::vector({1, 1})), // dilations + ::testing::Values(160), // out channels + ::testing::Values(2), // groups ::testing::Values(ov::op::PadType::EXPLICIT)), ::testing::ValuesIn(netPrecisions), ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), @@ -168,13 +168,13 @@ const auto smoke_2D_ExplicitPaddingSymmetric_Params3 = ::testing::Combine( ::testing::ValuesIn(netActivations)); const auto smoke_2D_ExplicitPaddingSymmetric_Params4 = ::testing::Combine( - ::testing::Combine(::testing::Combine(::testing::Values(std::vector({1, 1})), // kernels - ::testing::Values(std::vector({1, 1})), // strides - ::testing::Values(std::vector({0, 0})), // pads_begin - ::testing::Values(std::vector({0, 0})), // pads_end - ::testing::Values(std::vector({1, 1})), // dilations - ::testing::Values(40), // out channels - ::testing::Values(2), // groups + ::testing::Combine(::testing::Combine(::testing::Values(std::vector({1, 1})), // kernels + ::testing::Values(std::vector({1, 1})), // strides + ::testing::Values(std::vector({0, 0})), // pads_begin + ::testing::Values(std::vector({0, 0})), // pads_end + ::testing::Values(std::vector({1, 1})), // dilations + ::testing::Values(40), // out channels + ::testing::Values(2), // groups ::testing::Values(ov::op::PadType::EXPLICIT)), ::testing::ValuesIn(netPrecisions), ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp index 41505597f..0cdd46ce1 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp @@ -282,8 +282,14 @@ void testOneShape(const LSTMCellTestParams& params) { ov::nvidia_gpu::CancellationToken token{}; ov::nvidia_gpu::SimpleExecutionDelegator simpleExecutionDelegator{}; ov::nvidia_gpu::CudaGraphContext cudaGraphContext; - ov::nvidia_gpu::InferenceRequestContext context{ - emptyTensor, emptyMapping, emptyTensor, emptyMapping, threadContext, token, simpleExecutionDelegator, cudaGraphContext}; + ov::nvidia_gpu::InferenceRequestContext context{emptyTensor, + emptyMapping, + emptyTensor, + emptyMapping, + threadContext, + token, + simpleExecutionDelegator, + cudaGraphContext}; std::vector x_host(x_size); std::vector hi_host(hi_size); std::vector ci_host(ci_size); diff --git a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/lstm_sequence.cpp b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/lstm_sequence.cpp index 938e5baed..5d27b0c12 100644 --- a/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/lstm_sequence.cpp +++ b/modules/nvidia_plugin/tests/functional/shared_tests_instances/single_layer_tests/lstm_sequence.cpp @@ -35,23 +35,23 @@ class CUDALSTMSequenceTest : public UnsymmetricalComparer { } }; -using LSTMSequenceOptimizedParams = typename std::tuple< - ngraph::helpers::SequenceTestsMode, // pure Sequence or TensorIterator - size_t, // seq_lengths - size_t, // batch - size_t, // hidden size - size_t, // input size - std::vector, // activations - float, // clip - std::string, // major batch - ngraph::helpers::InputLayerType, // WRB input type (Constant or Parameter) - InferenceEngine::Precision, // Network precision - std::string>; // Device name +using LSTMSequenceOptimizedParams = + typename std::tuple