Update for reviewer comments

riverlijunjie · Sep 11, 2023 · 94ecdf3 · 94ecdf3
1 parent 5d9330d
commit 94ecdf3
Show file tree

Hide file tree

Showing 9 changed files with 23 additions and 50 deletions.
diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h
@@ -57,7 +57,7 @@ class CompiledModel : public ov::ICompiledModel {
 
     // Generic synchronization primitive on CompiledModel level.
     // Usage example: helps to avoid data races during CPU Graph initialization in multi-streams scenario
-    mutable std::shared_ptr<std::mutex> m_mutex;
+    std::shared_ptr<std::mutex> m_mutex;
     Config m_cfg;
     ExtensionManager::Ptr extensionManager;
     mutable std::atomic_int m_numRequests = {0};

diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
@@ -64,17 +64,8 @@ memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngin
             return memory::data_type::f64;
         case InferenceEngine::Precision::UNSPECIFIED:
             return memory::data_type::undef;
-        // Keep same data_size for unsupported precision
-        case InferenceEngine::Precision::U64:
-        case InferenceEngine::Precision::I64:
-            return memory::data_type::f64;
-        case InferenceEngine::Precision::U32:
-            return memory::data_type::s32;
-        case InferenceEngine::Precision::U16:
-        case InferenceEngine::Precision::I16:
-            return memory::data_type::f16;
         default: {
-            IE_THROW() << "The plugin does not support " << prec.name();
+            return memory::data_type::undef;
         }
     }
 }

diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp
@@ -112,9 +112,9 @@ void Graph::CreateGraph(const std::vector<NodePtr>& graphNodes,
 }
 
 template void Graph::CreateGraph(const std::shared_ptr<const ov::Model>&, const GraphContext::CPtr);
-void Graph::Replicate(const std::shared_ptr<const ov::Model> &subgraph) {
+void Graph::Replicate(const std::shared_ptr<const ov::Model> &model) {
     OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "Graph::Replicate", "ov::Model");
-    this->_name = subgraph->get_friendly_name();
+    this->_name = model->get_friendly_name();
     this->reuse_io_tensors = false;
 
     // Map data object onto producer node
@@ -137,7 +137,7 @@ void Graph::Replicate(const std::shared_ptr<const ov::Model> &subgraph) {
     };
 
     const bool is_legacy_api = getConfig().isLegacyApi;
-    for (const auto& op : subgraph->get_ordered_ops()) {
+    for (const auto& op : model->get_ordered_ops()) {
         const NodePtr node {Node::factory().create(op, context)};
 
         graphNodes.push_back(node);
@@ -202,7 +202,7 @@ void Graph::Replicate(const std::shared_ptr<const ov::Model> &subgraph) {
     };
 
     auto find_input_port_prec = [&](const std::string& name) -> ov::element::Type_t {
-        for (auto& it : subgraph->inputs()) {
+        for (auto& it : model->inputs()) {
             auto port_name = get_port_name(it, is_legacy_api);
             if (port_name == name)
                 return it.get_element_type();
@@ -220,7 +220,7 @@ void Graph::Replicate(const std::shared_ptr<const ov::Model> &subgraph) {
     }
 
     auto find_output_port_prec = [&](const std::string& name) -> ov::element::Type_t {
-        for (auto& it : subgraph->outputs()) {
+        for (auto& it : model->outputs()) {
             auto port_name = get_port_name(it, is_legacy_api);
             if (port_name == name)
                 return it.get_element_type();

diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp
@@ -148,10 +148,8 @@ void SyncInferRequest::redefine_memory_for_input_nodes() {
     }
 }
 
-void SyncInferRequest::update_external_inputs() {
+void SyncInferRequest::update_external_tensor_ptrs() {
     // Update it due to batched_tensors case will update input tensor
-    if (m_batched_tensors.size() == 0)
-        return;
     for (auto input : get_inputs()) {
         std::string input_name = get_port_name(input, m_is_legacy_api);
         if (input_name.empty()) {
@@ -173,7 +171,10 @@ void SyncInferRequest::infer() {
 
     throw_if_canceled();
     convert_batched_tensors();
-    update_external_inputs();
+    if (m_batched_tensors.size() > 0) {
+        // batched_tensors will be updated for each infer, external_ptr should be update together
+        update_external_tensor_ptrs();
+    }
 
     if (graph->hasDynamicInput()) {
         redefine_memory_for_input_nodes();
@@ -450,7 +451,7 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
                                         << ", if model input tensor precision is: " << netInPrc;
         }
 
-        const auto shape = port.get_partial_shape();
+        const auto& shape = port.get_partial_shape();
         const bool isDynamic = shape.is_dynamic();
         if (!shape.compatible(ov::PartialShape(tensor->get_shape()))) {
             OPENVINO_THROW("The tensor size is not equal to model, can't set input tensor with name: ",
@@ -496,7 +497,7 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
                                         << ", if model output tensor precision is: " << netOutPrc;
         }
 
-        const auto shape = port.get_partial_shape();
+        const auto& shape = port.get_partial_shape();
         const bool isDynamic = shape.is_dynamic();
 
         if (!shape.compatible(ov::PartialShape(tensor->get_shape()))) {
@@ -549,7 +550,7 @@ void SyncInferRequest::init_tensor(const std::string& name) {
         OPENVINO_THROW("Graph is not ready!");
 
     if (name.empty())
-        OPENVINO_THROW("Can't preapre tensor for empty name! ");
+        OPENVINO_ASSERT("Can't prepare tensor for empty name! ");
 
     ov::SoPtr<ITensor> tensor;
     const auto& inMap = graph->inputNodesMap;
@@ -561,7 +562,7 @@ void SyncInferRequest::init_tensor(const std::string& name) {
             tensor = ov::ISyncInferRequest::get_tensor(port);
 
             if (!tensor) {
-                const auto shape = port.get_partial_shape();
+                const auto& shape = port.get_partial_shape();
                 const bool isDynamic = shape.is_dynamic();
                 ov::Shape tensor_shape;
                 if (isDynamic) {
@@ -593,7 +594,7 @@ void SyncInferRequest::init_tensor(const std::string& name) {
             auto output_port = m_output_ports_map.find(name);
             auto port = output_port->second;
             const auto port_shape = port.get_partial_shape();
-            if (m_output_ports_map.find(name) != m_output_ports_map.end()) {
+            if (output_port != m_output_ports_map.end()) {
                 const auto& graph_shape = output->second->getInputShapeAtPort(0);
 
                 // WA, due to the transformations and constant folding, shape inference of the resulting model may

diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h
@@ -106,8 +106,8 @@ class SyncInferRequest : public ov::ISyncInferRequest {
     void pull_states();
     void redefine_memory_for_input_nodes();
 
-    void update_external_inputs();
-    InferenceEngine::TensorDesc create_tensor_desc(const ov::SoPtr<ov::ITensor>& tensor);
+    void update_external_tensor_ptrs();
+    static InferenceEngine::TensorDesc create_tensor_desc(const ov::SoPtr<ov::ITensor>& tensor);
     const ov::Output<const ov::Node>& get_internal_port(const ov::Output<const ov::Node>& port) const;
     bool m_is_legacy_api = false;
 

diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
@@ -530,7 +530,7 @@ Engine::compile_model(const std::shared_ptr<const ov::Model>& model, const ov::A
 
     if ((cloned_model->inputs().size() != model->inputs().size()) ||
         (cloned_model->outputs().size() != model->outputs().size())) {
-        OPENVINO_THROW("Input/output port size mismatched!");
+        OPENVINO_THROW("Input/output port size mismatched after transformation!");
     }
     // Make output ports have the same tensor names with original model
     for (size_t idx = 0; idx < cloned_model->outputs().size(); idx++) {

diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h
@@ -24,7 +24,7 @@ class Engine : public ov::IPlugin {
                                "",
                                false,
                                "Not Implemented",
-                               "compile_model with RemoteContext is not supported by this plugin!");
+                               "compile_model with RemoteContext is not supported by CPU plugin!");
     };
 
     void set_property(const ov::AnyMap& properties) override;

diff --git a/src/plugins/intel_cpu/src/serialize.cpp b/src/plugins/intel_cpu/src/serialize.cpp
@@ -24,7 +24,7 @@ void setInfo(pugi::xml_object_range<pugi::xml_named_node_iterator>&& nodes, T&&
         auto shape_attr = nodes_it->attribute("shape");
 
         if (!name_attr || !precision_attr || !shape_attr || info_iter == info.end()) {
-            IE_THROW(NetworkNotRead) << "The inputs/outputs information is invalid.";
+            OPENVINO_THROW("NetworkNotRead: the inputs/outputs information is invalid.");
         }
         info_iter->get_tensor_ptr()->set_element_type(ov::element::Type(precision_attr.value()));
         info_iter->get_tensor_ptr()->set_tensor_type(ov::element::Type(precision_attr.value()),

diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -199,6 +199,7 @@ precisions_map Transformations::get_convert_precisions() {
                           {ov::element::i4, ov::element::i8},
                           {ov::element::u4, ov::element::u8}};
 
+    // @todo should we always convert to f32 regardless of hardware support, as it is done for f16?
     if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
         map.insert({ov::element::bf16, ov::element::f32});
 
@@ -262,26 +263,6 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
         }, ov::pass::MarkDequantizationSubgraph);
     }
 
-    auto get_convert_precisions = []() {
-        precisions_map map = {
-            {ov::element::i64,     ov::element::i32},
-            {ov::element::u64,     ov::element::i32},
-            {ov::element::i16,     ov::element::i32},
-            {ov::element::u16,     ov::element::i32},
-            {ov::element::u32,     ov::element::i32},
-            {ov::element::f64,     ov::element::f32},
-            {ov::element::f16,     ov::element::f32},
-            {ov::element::boolean, ov::element::u8},
-            {ov::element::i4,      ov::element::i8},
-            {ov::element::u4,      ov::element::u8}
-        };
-        // @todo should we always convert to f32 regardless of hardware support, as it is done for f16?
-        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
-            map.insert({ov::element::bf16, ov::element::f32});
-
-        return map;
-    };
-
     static const auto precisions = get_convert_precisions();
     type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};