Skip to content

Commit

Permalink
[CPU] [ARM] FullyConnected fp16 issue reproducer
Browse files Browse the repository at this point in the history
  • Loading branch information
eshoguli committed Aug 14, 2024
1 parent 55ffb33 commit 473aec5
Show file tree
Hide file tree
Showing 72 changed files with 244 additions and 36 deletions.
3 changes: 2 additions & 1 deletion src/plugins/intel_cpu/tests/functional/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ endif()
if(NOT X86_64)
list(APPEND EXCLUDED_SOURCE_PATHS
${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/instances/x64
${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/x64)
${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/x64
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instances/low_precision_transformations/x64)
endif()

ov_add_test_target(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <vector>

#include "low_precision_transformations/fully_connected_transformation.hpp"
#include "common_test_utils/test_constants.hpp"

using namespace LayerTestsDefinitions;

namespace {
const std::vector<ov::element::Type> netPrecisions = {
ov::element::f32
};

const std::vector<MatMulShapes> shapes = {
{
ov::PartialShape{ 2, 8 },
ov::PartialShape{ 3, 8 },
false,
true
},
// {
// ov::PartialShape{ 1, 1, 16 },
// ov::PartialShape{ 1, 16, 8 },
// false,
// false
// },
// // transposeB <= here
// {
// ov::PartialShape{ 1, 16 },
// ov::PartialShape{ 8, 16 },
// false,
// true
// },
// {
// ov::PartialShape{ 16, 1 },
// ov::PartialShape{ 16, 8 },
// true,
// false
// },
// {
// ov::PartialShape{ 1, 16, 1 },
// ov::PartialShape{ 1, 16, 8 },
// true,
// false
// },
//// // MatMul_101
//// {
//// ov::PartialShape{ 1, 128, 768 },
//// ov::PartialShape{ 3072, 768 }, // after transpose: 768 x 3072
//// false,
//// true
//// },
};

const std::vector<ov::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams()
};

const std::vector<FullyConnectedParams> activations = {
// TODO: failed if transposeB = true: accuracy check
{
true, // activation
false, // perChannel
"fullyConnected,fullyConnected/DequantizationMultiply,relu"
},
//// // TODO: failed if transposeB = true: fp32 execution, FQ is not decomposed
//// {
//// true, // activation
//// true, // perChannel
//// "fullyConnected,fullyConnected/DequantizationMultiply,relu"
//// },
// {
// false, // activation
// false, // perChannel
// "fullyConnected_original,fullyConnected"
// },
//// // TODO: failed if transposeB = true: fp32 execution, FQ is not decomposed
//// {
//// false, // activation
//// true, // perChannel
//// "fullyConnected_original,fullyConnected"
//// }
};

INSTANTIATE_TEST_SUITE_P(smoke_LPT, FullyConnectedTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(shapes),
::testing::Values(ov::test::utils::DEVICE_CPU),
::testing::ValuesIn(trasformationParamValues),
::testing::ValuesIn({ov::element::i8 /*, ov::element::u8*/}),
::testing::ValuesIn(activations),
::testing::Values("gemm_acl_i8")),
FullyConnectedTransformation::getTestCaseName);
} // namespace
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,21 @@ class MatMulShapes {
bool transposeB;
};

class FullyConnectedParams {
public:
bool activation;
bool perChannelWeights;
std::string originalLayersNames;
};

typedef std::tuple<
ov::element::Type,
MatMulShapes,
std::string,
ov::pass::low_precision::LayerTransformation::Params> FullyConnectedTransformationParams;
ov::pass::low_precision::LayerTransformation::Params,
ov::element::Type,
FullyConnectedParams,
std::string> FullyConnectedTransformationParams;

namespace LayerTestsDefinitions {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
#include "low_precision_transformations/fully_connected_transformation.hpp"

#include <memory>
#include <string>
#include <tuple>
#include <vector>
#include <string>


#include "common_test_utils/common_utils.hpp"
#include "openvino/util/common_util.hpp"
#include "ov_lpt_models/mat_mul.hpp"

namespace LayerTestsDefinitions {
Expand All @@ -20,14 +21,22 @@ std::string FullyConnectedTransformation::getTestCaseName(const testing::TestPar
MatMulShapes shapes;
std::string targetDevice;
ov::pass::low_precision::LayerTransformation::Params params;
std::tie(precision, shapes, targetDevice, params) = obj.param;
ov::element::Type weightsType;
FullyConnectedParams activation;
std::string expectedPrimitiveType;
std::tie(precision, shapes, targetDevice, params, weightsType, activation, expectedPrimitiveType) = obj.param;

std::ostringstream result;
result <<
get_test_case_name_by_params(precision, shapes.inputA, targetDevice, params) <<
shapes.inputB << "_" <<
get_test_case_name_by_params(precision, shapes.inputA, targetDevice, params) <<
shapes.inputB << "_" <<
shapes.transposeA << "_" <<
shapes.transposeB;
shapes.transposeB << "_" <<
weightsType << "_" <<
"Activation=" << activation.activation << "_" <<
"perChannelWeights=" << activation.perChannelWeights << "_" <<
activation.originalLayersNames << "_" <<
expectedPrimitiveType;

return result.str();
}
Expand All @@ -36,7 +45,10 @@ void FullyConnectedTransformation::SetUp() {
ov::element::Type precision;
MatMulShapes shapes;
ov::pass::low_precision::LayerTransformation::Params params;
std::tie(precision, shapes, targetDevice, params) = this->GetParam();
ov::element::Type weightsType;
FullyConnectedParams activation;
std::string expectedPrimitiveType;
std::tie(precision, shapes, targetDevice, params, weightsType, activation, expectedPrimitiveType) = this->GetParam();

init_input_shapes({ shapes.inputA, shapes.inputB });

Expand All @@ -45,12 +57,31 @@ void FullyConnectedTransformation::SetUp() {
shapes.inputA,
shapes.inputB,
shapes.transposeA,
shapes.transposeB);
shapes.transposeB,
weightsType == ov::element::i8,
activation.perChannelWeights,
activation.activation);

ov::pass::Serialize(
"/Users/eshoguli/projects/openvino_matmul/test.original.xml",
"/Users/eshoguli/projects/openvino_matmul/test.original.bin").run_on_model(function);
}

TEST_P(FullyConnectedTransformation, CompareWithRefImpl) {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
run();

// const auto& activation = std::get<5>(GetParam());
// const auto originalLayersNames = get_property_by_type("FullyConnected", "originalLayersNames");
// EXPECT_EQ(ov::util::to_lower(activation.originalLayersNames), originalLayersNames);
//
// const auto& actualPrecision = get_runtime_precision_by_type("FullyConnected");
// const auto expectedPrecision = std::get<4>(GetParam());
// EXPECT_EQ(actualPrecision, expectedPrecision.to_string());
//
// const auto& expectedPrimitiveType = std::get<6>(GetParam());
// const std::string actualPrimitiveType = get_property_by_type("FullyConnected", "primitiveType");
// EXPECT_EQ(expectedPrimitiveType, actualPrimitiveType);
};

} // namespace LayerTestsDefinitions
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,21 @@ class MatMulFunction {

static std::shared_ptr<ov::Model> getOriginal(
const ov::element::Type precision,
const ov::PartialShape inputShape1,
const ov::PartialShape inputShape2,
const ov::PartialShape& inputShape1,
const ov::PartialShape& inputShape2,
const bool transpose1,
const bool transpose2);
const bool transpose2,
const bool signedWeights,
const bool perChannelWeights,
const bool relu);

static std::shared_ptr<ov::Model> getOriginal(
const ov::element::Type precision,
const ov::Shape& inputShape1,
const FakeQuantizeOnData& fqOnData1,
const ov::Shape& inputShape2,
const FakeQuantizeOnData& fqOnData2);
const FakeQuantizeOnData& fqOnData2,
const bool requantization = false);

static std::shared_ptr<ov::Model> getOriginal(const ov::element::Type netPrecision,
const ov::PartialShape& inputShape1,
Expand Down
110 changes: 87 additions & 23 deletions src/tests/ov_helpers/ov_lpt_models/src/mat_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,36 +49,81 @@ std::shared_ptr<ov::Model> MatMulFunction::getOriginal(
return function;
}

namespace {
template <typename T>
std::vector<T> generate_values(const ov::Shape& shape) {
std::vector<T> values(ov::shape_size(shape));
for (size_t i = 0; i < values.size(); ++i) {
values[i] = static_cast<T>(static_cast<T>(i) / 10.0);
}
return values;
}

std::vector<float> generate_dequantization_values(
const ov::Shape& shape,
const size_t levels,
const bool low) {
const auto shape_size = ov::shape_size(shape);
std::vector<float> values(shape_size);
for (size_t i = 0; i < shape_size; ++i) {
values[i] = low ? -128.f / (static_cast<float>(i) + 1.f) : 127.f / (static_cast<float>(i) + 1.f);
}
return values;
}
} // namespace

std::shared_ptr<ov::Model> MatMulFunction::getOriginal(
const ov::element::Type precision,
const ov::PartialShape inputShape1,
const ov::PartialShape inputShape2,
const bool transpose1,
const bool transpose2) {
const ov::element::Type precision,
const ov::PartialShape& inputShape1,
const ov::PartialShape& inputShape2,
const bool transpose1,
const bool transpose2,
const bool signedOnWeights,
const bool perChannelWeights,
const bool relu) {
const auto paramNode = std::make_shared<ov::opset1::Parameter>(precision, inputShape1);
const std::vector<size_t> constShapes(inputShape1.rank().get_length(), 1ul);
const auto fakeQuantizeOnAcitvations = ov::test::utils::make_fake_quantize(
paramNode, precision, 256ul, constShapes,
{ 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f });
const auto fakeQuantizeOnAcitvations = signedOnWeights ?
ov::test::utils::make_fake_quantize(
paramNode, precision, 256ul, constShapes,
{ -128.f / 4.f }, { 127.f / 4.f }, { -128.f / 4.f }, { 127.f / 4.f }) :
ov::test::utils::make_fake_quantize(
paramNode, precision, 256ul, constShapes,
{ 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f });
fakeQuantizeOnAcitvations->set_friendly_name("fakeQuantizeOnAcitvations");

const size_t channel = inputShape2[inputShape2.size() - 2].get_length();
auto weightsConst = std::make_shared<ov::op::v0::Constant>(
precision,
inputShape2.to_shape(),
std::vector<float>({ 1.f }));
const auto fakeQuantizeOnWeights = ov::test::utils::make_fake_quantize(
weightsConst, precision, 256ul, { 1ul, 1ul },
{ -128.f / 8.f }, { 127.f / 8.f }, { -128.f / 8.f }, { 127.f / 8.f });
precision,
inputShape2.to_shape(),
generate_values<float>(inputShape2.to_shape()));

const auto fakeQuantizeOnWeights = perChannelWeights ?
ov::test::utils::make_fake_quantize(
weightsConst, precision, 256ul,
Shape{ channel, 1 },
generate_dequantization_values(Shape{ channel, 1 }, 256ul, true),
generate_dequantization_values(Shape{ channel, 1 }, 256ul, false),
generate_dequantization_values(Shape{ channel, 1 }, 256ul, true),
generate_dequantization_values(Shape{ channel, 1 }, 256ul, false)) :
ov::test::utils::make_fake_quantize(
weightsConst, precision, 256ul, { 1ul, 1ul },
{ -128.f / 8.f }, { 127.f / 8.f }, { -128.f / 8.f }, { 127.f / 8.f });
fakeQuantizeOnWeights->set_friendly_name("fakeQuantizeOnWeights");

const std::shared_ptr<ov::opset1::MatMul> fullyConnected = std::make_shared<ov::opset1::MatMul>(
std::shared_ptr<Node> parent = std::make_shared<ov::opset1::MatMul>(
fakeQuantizeOnAcitvations->output(0),
fakeQuantizeOnWeights->output(0),
transpose1,
transpose2);
fullyConnected->set_friendly_name("fullyConnected");
parent->set_friendly_name("fullyConnected");

if (relu) {
parent = std::make_shared<ov::opset1::Relu>(parent);
parent->set_friendly_name("relu");
}

ov::ResultVector results{ std::make_shared<ov::opset1::Result>(fullyConnected) };
ov::ResultVector results{ std::make_shared<ov::opset1::Result>(parent) };
std::shared_ptr<ov::Model> function = std::make_shared<ov::Model>(
results,
ov::ParameterVector{ paramNode },
Expand All @@ -93,21 +138,40 @@ std::shared_ptr<ov::Model> MatMulFunction::getOriginal(
const ov::Shape& inputShape1,
const FakeQuantizeOnData& fqOnData1,
const ov::Shape& inputShape2,
const FakeQuantizeOnData& fqOnData2) {
const FakeQuantizeOnData& fqOnData2,
const bool requantization) {
const std::shared_ptr<ov::opset1::Parameter> input1 = std::make_shared<ov::opset1::Parameter>(precision, inputShape1);
input1->set_friendly_name("input1");

const std::shared_ptr<ov::opset1::Parameter> input2 = std::make_shared<ov::opset1::Parameter>(precision, inputShape2);
input2->set_friendly_name("input2");

const std::shared_ptr<ov::opset1::MatMul> matMul = std::make_shared<ov::opset1::MatMul>(
makeFakeQuantize(input1, precision, fqOnData1),
makeFakeQuantize(input2, precision, fqOnData2),
std::shared_ptr<ov::Node> parent1 = input1;
if (!fqOnData1.empty()) {
parent1 = makeFakeQuantize(parent1, precision, fqOnData1);
}

std::shared_ptr<ov::Node> parent2 = input2;
if (!fqOnData2.empty()) {
parent2 = makeFakeQuantize(parent2, precision, fqOnData2);
}

std::shared_ptr<Node> parent = std::make_shared<ov::opset1::MatMul>(
parent1,
parent2,
false,
false);
matMul->set_friendly_name("matMul");
parent->set_friendly_name("matMul");

if (requantization) {
parent = makeFakeQuantize(parent, precision, fqOnData1);
parent = std::make_shared<ov::opset1::PRelu>(
parent,
std::make_shared<ov::opset1::Constant>(ov::element::f32, Shape{1}, std::vector<float>{0.f}));
parent->set_friendly_name("prelu");
}

std::shared_ptr<ov::opset1::Result> result = std::make_shared<ov::opset1::Result>(matMul);
std::shared_ptr<ov::opset1::Result> result = std::make_shared<ov::opset1::Result>(parent);

std::shared_ptr<ov::Model> function = std::make_shared<ov::Model>(
ov::ResultVector{ result },
Expand Down

0 comments on commit 473aec5

Please sign in to comment.