Skip to content

Commit

Permalink
[CPU] [ARM] FullyConnected: performance measurement workarounds: FQ d…
Browse files Browse the repository at this point in the history
…ecomposition, dequantize
  • Loading branch information
eshoguli committed Aug 9, 2024
1 parent 9c69960 commit 563ad0b
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 8 deletions.
6 changes: 6 additions & 0 deletions src/plugins/intel_cpu/src/graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "graph_optimizer.h"

#include "dnnl_extension_utils.h"
#include "low_precision/rt_info/bias_attribute.hpp"
#include "nodes/bin_conv.h"
#include "nodes/common/cpu_convert.h"
#include "nodes/conv.h"
Expand Down Expand Up @@ -278,6 +279,11 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph &graph) {
auto scales = mul->getParentEdgeAt(1)->getParent();
if (!scaleDimsCheck(node, scales)) continue;

// TODO: debug only: how to check if attribute exists for CPU node (ov::marked_as_bias(mul))
if ((node->getType() == Type::FullyConnected) || (node->getType() == Type::MatMul)) {
continue;
}

if (initializeDeQuantizedScales(node, scales)) {
DEBUG_LOG("GraphOptimizer##FusingDQ: Node ##", mul->getName(), " optimized as DQ scales of Node ##", node->getName());
node->addOriginalLayer(mul->getOriginalLayers());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ bool ACLLowpFullyConnectedExecutor::supports(const FCConfig &config) {
//VERIFY(postOpsNumbers(config) == 0, UNSUPPORTED_NUMBER_OF_POSTOPS);
VERIFY(one_of(srcRank(config), 2U, 3U, 4U), UNSUPPORTED_SRC_RANK);
VERIFY(one_of(weiRank(config), 2U, 3U, 4U), UNSUPPORTED_WEI_RANK);
VERIFY(static_cast<FCAttrs>(config.attrs).dequantizationScales.size() <= 1, UNSUPPORTED_PER_CHANNEL_QUANTIZATION);

const auto attrs = static_cast<FCAttrs>(config.attrs);
VERIFY(attrs.dequantizationScales.size() <= 1, UNSUPPORTED_PER_CHANNEL_QUANTIZATION);

return true;
}

Expand All @@ -106,7 +109,7 @@ arm_compute::Status ACLLowpFullyConnectedExecutor::validateTensorsInfo(const ACL
const auto matMulValid = arm_compute::NEGEMMLowpMatrixMultiplyCore::validate(
aclMemoryInfos[ACLArgs::ACL_SRC_0].get(),
aclMemoryInfos[ACLArgs::ACL_WEI].get(),
nullptr, //aclMemoryInfos[ACLArgs::ACL_BIAS].get(),
aclMemoryInfos[ACLArgs::ACL_BIAS].get(),
aclMemoryInfos[ACLArgs::ACL_DST].get(),
gemmInfo);
return matMulValid;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,10 @@ static const TypeMapping aclFCTypeMapping {
static const TypeMapping aclLowpFCTypeMapping {
// {src, wei, bia, dst} pt<src, wei, bias, dst>
{{_i8, _i8, _any, _f32}, pt(just<i8>(), just<i8>(), just<i32>(), just<f32>())},
{{_i8, _i8, _any, _f32}, pt(just<i8>(), just<i8>(), just<f32>(), just<f32>())},
//{{_i8, _i8, _any, _i32}, pt(just<i8>(), just<i8>(), just<i32>(), just<i32>())},
//{{_u8, _u8, _any, _i32}, pt(just<u8>(), just<u8>(), bypass(), just<i32>())},
{{_any, _any, _any, _any}, pt(just<f32>(), just<f32>(), just<f32>(), just<f32>())}
//{{_any, _any, _any, _any}, pt(just<f32>(), just<f32>(), just<f32>(), just<f32>())}
};

static const MappingNotation dnnlConvolutionMappingNotation {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,11 +385,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
const auto precisions = get_convert_precisions();
if (inferencePrecision == ov::element::f16) {
precisions_map fp_convert_precision_map = {{ov::element::f32, ov::element::f16}};
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
type_to_fuse_map fuse_map = {{ov::opset1::FakeQuantize::get_type_info_static(), fuse_type_to_fq}};
#else
type_to_fuse_map fuse_map = {};
#endif
const bool keep_precision_sensitive_in_fp32 = true;
CPU_REGISTER_PASS_COMMON(manager,
ov::pass::ConvertPrecision,
Expand Down Expand Up @@ -755,9 +751,11 @@ void Transformations::Lpt(const std::vector<ov::element::Type>& defaultPrecision
return LayerTransformation::isAsymmetricQuantization(node, defaultPrecisions) ||
WeightableLayerTransformation::isAsymmetricOnWeights(node, defaultPrecisions);
}, ConvolutionBackpropDataTransformation);
#if !defined(OPENVINO_ARCH_ARM64)
CPU_SET_CALLBACK_COMMON(lptManager, [](const_node_ptr& node) -> bool {
return ov::marked_as_bias(node);
}, AddTransformation);
#endif

CPU_SET_CALLBACK_X64(lptManager, [&](const_node_ptr& node) -> bool {
const auto& consumers = node->get_output_target_inputs(0);
Expand Down Expand Up @@ -1161,7 +1159,7 @@ void Transformations::PostSnippets(void) {
ov::pass::Manager postSnippetsManager("CPU:PostSnippets");
postSnippetsManager.set_per_pass_validation(false);
CPU_REGISTER_PASS_COMMON(postSnippetsManager, ov::pass::FakeQuantizeDecomposition);
CPU_SET_CALLBACK_COMMON(postSnippetsManager,
CPU_SET_CALLBACK_X64(postSnippetsManager,
[](const_node_ptr& node) -> bool {
std::string errMsg;
return node::FakeQuantize::isSupportedOperation(node, errMsg);
Expand Down

0 comments on commit 563ad0b

Please sign in to comment.