fixed build

eshoguli · Nov 14, 2024 · e6d4880 · e6d4880
1 parent 03315bf
commit e6d4880
Show file tree

Hide file tree

Showing 8 changed files with 433 additions and 327 deletions.
diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp
diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.hpp
@@ -11,42 +11,6 @@
 namespace ov {
 namespace intel_cpu {
 
-struct ACLFCAttrs {
-    ov::element::Type inputPrecision;
-    bool isConvertedWeights = false;
-    bool isWeightsRepacked = false;
-    bool weightsNonTransposed;
-};
-
-namespace acl_fc_executor {
-
-class ACLWeightsConverter : public ACLCommonExecutor {
-public:
-    ACLWeightsConverter() = default;
-    void updateTensorsShapes(ACLShapes& aclMemoryShapes) override {}
-    arm_compute::Status validateTensorsInfo(const ACLInfos & aclMemoryInfos) override;
-    ACLFunction configureFunction(const ACLTensors & aclMemoryTensors) override;
-};
-
-class ACLWeightFormatGenerator : public ACLCommonExecutor {
-public:
-    ACLWeightFormatGenerator(const FCAttrs& attrs,
-                             const PostOps& postOps,
-                             const MemoryArgs& memory);
-    void updateTensorsShapes(ACLShapes& aclMemoryShapes) override;
-    arm_compute::Status validateTensorsInfo(const ACLInfos & aclMemoryInfos) override;
-    ACLFunction configureFunction(const ACLTensors & aclMemoryTensors) override;
-    arm_compute::WeightFormat getOptImplWeightFormat() {
-        return expectedWeightFormat;
-    }
-private:
-    arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo;
-    ACLFCAttrs aclfcAttrs;
-    arm_compute::WeightFormat expectedWeightFormat;
-};
-
-}  // namespace acl_fc_executor
-
 class ACLFullyConnectedExecutor : public ACLCommonExecutor {
 public:
     ACLFullyConnectedExecutor(const FCAttrs& attrs,

diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.cpp
diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected_utils.hpp
@@ -1,13 +1,108 @@
 // Copyright (C) 2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-
 #pragma once
-
 #include "acl_common_executor.hpp"
+#include "ov_optional.hpp"
 #include "nodes/executors/fullyconnected_config.hpp"
 
 namespace ov {
 namespace intel_cpu {
+
+struct ACLFCAttrs {
+    ov::element::Type inputPrecision;
+    bool isConvertedWeights = false;
+    bool isWeightsRepacked = false;
+    bool weightsNonTransposed;
+};
+
+namespace acl_fc_executor {
+
+VectorDims makeDummyInputDims(const Shape& inShape, const Shape& wShape);
+
+VectorDims makeDummyOutputDims(const VectorDims& inShape, const VectorDims& wShape, const size_t out_rank);
+
+DnnlMemoryDescPtr makeTransposedWeightDescriptor(const DnnlMemoryDescPtr srcDesc,
+                                                        const DnnlMemoryDescPtr dstDesc);
+
+ov::optional<MemoryPtr> convertWeightPrecision(MemoryPtr input,
+                                                      MemoryPtr output,
+                                                      ov::element::Type weightPrecision);
+
+ov::optional<MemoryPtr> reorderDataFallback(MemoryPtr input,
+                                                   MemoryPtr output,
+                                                   ExecutorContext::CPtr context);
+
+MemoryPtr reorderData(DnnlMemoryDescPtr srcWeightDesc,
+                             DnnlMemoryDescPtr dstWeightDesc,
+                             MemoryCPtr weightsMem,
+                             ExecutorContext::CPtr context);
+
+MemoryPtr reorderWeights(const MemoryArgs &memory,
+                                const ExecutorContext::CPtr context,
+                                ACLFCAttrs& aclfcAttrs,
+                                DnnlMemoryDescPtr dnnlSrcDesc,
+                                DnnlMemoryDescPtr dnnlDstDesc);
+
+MemoryPtr prepareWeightMemory(const MemoryArgs &memory,
+                                     const ExecutorContext::CPtr context,
+                                     const FCAttrs &attrs,
+                                     ACLFCAttrs& aclfcAttrs,
+                                     const PostOps &postOps,
+                                     arm_compute::WeightFormat& expectedWeightFormat,
+                                     arm_compute::TensorInfo& weiTensorInfo);
+
+arm_compute::TensorShape normalizeDimsTo2D(const arm_compute::TensorShape shape);
+
+void updateFCTensorsShapes(ACLShapes& aclMemoryShapes);
+
+class ACLWeightsConverter : public ACLCommonExecutor {
+public:
+    ACLWeightsConverter() = default;
+    void updateTensorsShapes(ACLShapes& aclMemoryShapes) override {}
+    arm_compute::Status validateTensorsInfo(const ACLInfos & aclMemoryInfos) override;
+    ACLFunction configureFunction(const ACLTensors & aclMemoryTensors) override;
+};
+
+class ACLWeightsTranspose : public ACLCommonExecutor {
+public:
+    ACLWeightsTranspose() = default;
+    void updateTensorsShapes(ACLShapes& aclMemoryShapes) override {}
+    arm_compute::Status validateTensorsInfo(const ACLInfos & aclMemoryInfos) override;
+    ACLFunction configureFunction(const ACLTensors & aclMemoryTensors) override;
+};
+
+class ACLWeightFormatGenerator : public ACLCommonExecutor {
+public:
+    ACLWeightFormatGenerator(const FCAttrs& attrs,
+                             const PostOps& postOps,
+                             const MemoryArgs& memory);
+    void updateTensorsShapes(ACLShapes& aclMemoryShapes) override;
+    arm_compute::Status validateTensorsInfo(const ACLInfos & aclMemoryInfos) override;
+    ACLFunction configureFunction(const ACLTensors & aclMemoryTensors) override;
+    arm_compute::WeightFormat getOptImplWeightFormat() {
+        return expectedWeightFormat;
+    }
+private:
+    arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo;
+    arm_compute::WeightsInfo weightsInfo;
+    ACLFCAttrs aclfcAttrs;
+    arm_compute::WeightFormat expectedWeightFormat;
+};
+
+class ACLWeightsReorder : public ACLCommonExecutor {
+public:
+    ACLWeightsReorder(arm_compute::WeightFormat inWeightFormat,
+                      arm_compute::WeightFormat outWeightFormat)
+                      : inWeightFormat(inWeightFormat), outWeightFormat(outWeightFormat) {}
+    void updateTensorsShapes(ACLShapes& aclMemoryShapes) override {}
+    arm_compute::Status validateTensorsInfo(const ACLInfos & aclMemoryInfos) override;
+    ACLFunction configureFunction(const ACLTensors & aclMemoryTensors) override;
+private:
+    arm_compute::WeightFormat inWeightFormat;
+    arm_compute::WeightFormat outWeightFormat;
+};
+
+}  // namespace acl_fc_executor
 }  // namespace intel_cpu
-}  // namespace ov
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.cpp
@@ -58,7 +58,7 @@ ACLLowpFullyConnectedExecutor::ACLLowpFullyConnectedExecutor(const FCAttrs &attr
                                                              const MemoryArgs &memory,
                                                              const ExecutorContext::CPtr& context) : dequantizationScales(attrs.dequantizationScales) {
     initFCAttrs(attrs, aclTensorAttrs, aclfcAttrs, memory, gemmInfo, postOps);
-    packedWeights = acl_fc_executor::prepareWeightMemory(memory, context, attrs, aclfcAttrs, postOps);
+    packedWeights = acl_fc_executor::prepareWeightMemory(memory, context, attrs, aclfcAttrs, postOps, expectedWeightFormat, weiTensorInfo);
 }
 
 bool ACLLowpFullyConnectedExecutor::supports(const FCConfig &config) {

diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.hpp
@@ -37,7 +37,8 @@ class ACLLowpFullyConnectedExecutor : public ACLCommonExecutor {
 
 private:
     arm_compute::GEMMInfo gemmInfo;
-    arm_compute::WeightsInfo weightsInfo;
+    arm_compute::WeightFormat expectedWeightFormat;
+    arm_compute::TensorInfo weiTensorInfo;
 
     MemoryCPtr packedWeights;
     ACLFCAttrs aclfcAttrs;

diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp
@@ -90,7 +90,7 @@ static const TypeMapping aclFCTypeMapping {
 
 static const TypeMapping aclLowpFCTypeMapping {
     // {src, wei, bia, dst}                  pt<src, wei, bias, dst>
-    {{_i8, _i8, _any, _f32},                 pt(bypass(), bypass(), just(), bypass())}
+    {{_i8, _i8, _any, _f32},                 pt(bypass(), bypass(), just<f32>(), bypass())}
 };
 
 static const MappingNotation dnnlConvolutionMappingNotation {

diff --git a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/mat_mul.hpp b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/mat_mul.hpp
@@ -29,7 +29,7 @@ class MatMulFunction {
         const bool transpose1,
         const bool transpose2,
         const bool signedWeights,
-        const bool perChannelWeightsDequantization ,
+        const bool perChannelWeightsDequantization,
         const bool relu,
         const bool fq);