From 6b43ec738b59e99bbfc8d9dfd0af20620bb3d4e6 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 16 Jul 2021 17:05:52 +0300 Subject: [PATCH] [GPU] Fixed matmul handling for some shapes (#6642) --- .../src/cldnn_engine/cldnn_engine.cpp | 6 +++ .../src/cldnn_engine/ops/matmul.cpp | 39 ++++++++++++++++--- .../single_layer_tests/mat_mul.cpp | 22 ++++++++++- 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index 72a34dd855af48..078a68c67843b6 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -83,6 +83,7 @@ #include "gpu/gpu_config.hpp" #include "cldnn/runtime/device_query.hpp" +#include "cldnn/runtime/debug_configuration.hpp" #ifdef __linux__ # include @@ -436,6 +437,11 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc manager.run_passes(nGraphFunc); } } + + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { + clonedNetwork.serialize(debug_config->dump_graphs + "/transformed_func.xml"); + } return clonedNetwork; } diff --git a/inference-engine/src/cldnn_engine/ops/matmul.cpp b/inference-engine/src/cldnn_engine/ops/matmul.cpp index a8818c9e6f67ee..3d09fc7fd4e5e6 100644 --- a/inference-engine/src/cldnn_engine/ops/matmul.cpp +++ b/inference-engine/src/cldnn_engine/ops/matmul.cpp @@ -62,6 +62,8 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o bool is_fc = IsNodeOnConstPath(op->get_input_node_shared_ptr(1)); is_fc &= std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) { return x != 1; }) <= 2; + // TODO: This conditions can be relaxed with proper handling in FC path + is_fc &= shape_b.size() > 1 && shape_a.size() > 1; if (is_fc) { ngraph::Shape shape_a_aligned, shape_b_aligned; @@ -73,10 +75,10 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o auto inputName = inputPrimitives[0]; auto weightsName = inputPrimitives[1]; + // Weights normalization if (!op->get_transpose_b()) { - ngraph::Shape output_shape = shape_b; - std::vector transpose_order(output_shape.size()); + std::vector transpose_order(shape_b.size()); std::iota(transpose_order.begin(), transpose_order.end(), 0); std::swap(*(transpose_order.end() - 1), *(transpose_order.end() - 2)); @@ -95,8 +97,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o // Input normalization if (op->get_transpose_a()) { - ngraph::Shape output_shape = shape_a; - std::vector transpose_order(output_shape.size()); + std::vector transpose_order(shape_a.size()); std::iota(transpose_order.begin(), transpose_order.end(), 0); std::swap(*(transpose_order.end() - 1), *(transpose_order.end() - 2)); @@ -131,16 +132,20 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o if (reshape_fc) { inputName = reshape_to_2d(shape_a, inputName, shape_a.back(), "_cldnn_reshape_in"); + } + + if (shape_b.size() != 2) { weightsName = reshape_to_2d(shape_b, weightsName, K, "_cldnn_reshape_weights"); } + auto input_rank = reshape_fc ? 2 : shape_a.size(); auto fcPrim = cldnn::fully_connected(layerName, inputName, weightsName, "", DataTypeFromPrecision(op->get_output_element_type(0)), cldnn::padding(), - op->get_output_shape(0).size()); + input_rank); p.AddPrimitive(fcPrim); @@ -196,7 +201,29 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o auto reshapeName = layerName + "_cldnn_in" + std::to_string(i) + "_reshape"; // Extend input dimensions by prepending ones - inputDims.insert(inputDims.begin(), outDimsN - inputDimsN, 1ul); + if (inputDimsN == 1) { + // One-dimensional tensors unsqueezing is applied for each input independently. + // The axes inserted in this step are not included in the output shape. + // * If rank of the **first** input is equal to 1, it is always unsqueezed to 2D tensor **row vector** (regardless of `transpose_a`) + // by adding axes with size 1 at ROW_INDEX_DIM, to the **left** of the shape. For example `[S]` will be reshaped to `[1, S]`. + // * If rank of the **second** input is equal to 1, it is always unsqueezed to 2D tensor **column vector** (regardless of `transpose_b`) + // by adding axes with size 1 at COL_INDEX_DIM, to the **right** of the shape. For example `[S]` will be reshaped to `[S, 1]`. + bool transpose = false; + if (i == 0) { + transpose = op->get_transpose_a(); + inputDims.insert(inputDims.begin(), 1); + } else { + transpose = op->get_transpose_b(); + inputDims.insert(inputDims.end(), 1); + } + // Specs says that shapes must be unsqueezed regardless of tranpose flag, but primitive implementation always respects transposes + // so we have to swap dimensions correspondingly to have consistent shapes. + if (transpose) { + std::swap(inputDims[0], inputDims[1]); + } + } + if (inputDimsN < outDimsN) + inputDims.insert(inputDims.begin(), outDimsN - inputDimsN, 1ul); auto targetShape = gemmSpecificTensor(inputDims); diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mat_mul.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mat_mul.cpp index 757bd2955b264e..6305f28e6145d9 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mat_mul.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mat_mul.cpp @@ -16,7 +16,27 @@ const std::vector inputPrecisions = { }; const std::vector shapeRelatedParams = { - { { {1, 4, 5, 6}, false }, { {1, 4, 6, 4}, false } } + { { {1, 4, 5, 6}, false }, { {1, 4, 6, 4}, false } }, + { { {1, 16, 128}, false }, { {1, 64, 128}, true } }, + { { {4, 5, 6}, false }, { {6, 3}, false } }, + { { {9, 9, 9}, false }, { {9, 9}, false } }, + { { {1, 2, 3}, false }, { {1, 1, 3, 2}, false } }, + { { {1, 3, 2, 4}, false }, { {2, 1, 4, 2}, false } }, + { { {2, 1, 2, 4}, false }, { {1, 3, 4, 2}, false } }, + { { {3, 2, 4}, false }, { {2, 1, 4, 2}, false } }, + { { {2, 1, 4, 2}, false }, { {3, 2, 4}, false } }, + { { {2, 1, 2, 3}, true }, { {3, 2, 4}, false } }, + { { {2, 1, 3, 2}, false }, { {3, 4, 2}, true } }, + { { {2, 1, 2, 3}, true }, { {3, 4, 2}, true } }, + { { {3}, false }, { {2, 2, 3, 1}, false } }, + { { {2, 2, 1, 3}, false }, { {3}, false } }, + { { {1, 5}, false }, { {5, 1}, false } }, + { { {5, 1}, true }, { {5, 1}, false } }, + { { {1, 5}, false }, { {1, 5}, true } }, + { { {1, 5}, false }, { {5}, false } }, + { { {5}, false }, { {5, 1}, false } }, + { { {5}, false }, { {5}, false } }, + { { {5}, true }, { {5}, true } } }; std::vector secondaryInputTypes = {