[IE CLDNN] Implement ExtractImagePatches operation (openvinotoolkit#1127

) The ExtractImagePatches operation collects patches from the input tensor, as if applying a convolution. All extracted patches are stacked in the depth dimension of the output. JIRA: 30055
alex-chaiko · Jun 29, 2020 · 08cd0f7 · 08cd0f7
1 parent d0be6b1
commit 08cd0f7
Show file tree

Hide file tree

Showing 20 changed files with 1,303 additions and 7 deletions.
diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp
@@ -66,6 +66,7 @@
 #include <api/ctc_greedy_decoder.hpp>
 #include <api/cum_sum.hpp>
 #include <api/embedding_bag.hpp>
+#include <api/extract_image_patches.hpp>
 
 #include <chrono>
 #include <cmath>
@@ -605,6 +606,7 @@ Program::LayerType Program::LayerTypeFromStr(const std::string &str) {
         { "EmbeddingBagPackedSum", EmbeddingBagPackedSum },
         { "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum },
         { "EmbeddingSegmentsSum", EmbeddingSegmentsSum },
+        { "ExtractImagePatches" , ExtractImagePatches },
     };
     auto it = LayerNameToType.find(str);
     if (it != LayerNameToType.end())
@@ -1297,6 +1299,8 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEng
             break;
         case EmbeddingSegmentsSum: CreateEmbeddingSegmentsSumPrimitive(topology, layer);
             break;
+        case ExtractImagePatches: CreateExtractImagePatchesPrimitive(topology, layer);
+            break;
         default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type);
     }
 }
@@ -4889,6 +4893,32 @@ void Program::CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, Inf
     AddPrimitiveToProfiler(layerName, layer);
 }
 
+void Program::CreateExtractImagePatchesPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer) {
+    ValidateLayer(layer, 1);
+
+    auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    auto eipLayer = as<InferenceEngine::GenericLayer*>(layer);
+
+    std::vector<unsigned int> sizes = eipLayer->GetParamAsUInts("sizes");
+    std::vector<unsigned int> strides = eipLayer->GetParamAsUInts("strides");
+    std::vector<unsigned int> rates = eipLayer->GetParamAsUInts("rates");
+    std::string auto_pad = eipLayer->GetParamAsString("auto_pad");
+
+    std::string eipLayerName = layer_type_name_ID(layer);
+
+    auto extractImagePatchesPrim = cldnn::extract_image_patches(
+        eipLayerName,
+        inputPrimitives[0],
+        sizes,
+        strides,
+        rates,
+        auto_pad,
+        CldnnTensorFromIEDims(eipLayer->outData[0]->getTensorDesc().getDims()));
+
+    topology.add(extractImagePatchesPrim);
+    AddPrimitiveToProfiler(eipLayerName, layer);
+}
+
 bool Program::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const {
     if (splitLayer->outData.size() != 2) return false;  // split into 2
 

diff --git a/inference-engine/src/cldnn_engine/cldnn_program.h b/inference-engine/src/cldnn_engine/cldnn_program.h
@@ -221,6 +221,7 @@ class Program {
         EmbeddingBagPackedSum,
         EmbeddingBagOffsetsSum,
         EmbeddingSegmentsSum,
+        ExtractImagePatches,
         NO_TYPE
     };
     using GenericBlobMap = std::map<cldnn::primitive_id, cldnn::primitive_id>;
@@ -382,6 +383,7 @@ class Program {
     void CreateEmbeddingBagPackedSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
     void CreateEmbeddingBagOffsetsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
     void CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
+    void CreateExtractImagePatchesPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 };
 
 }  // namespace CLDNNPlugin
diff --git a/...functional/plugin/gpu/shared_tests_instances/single_layer_tests/extract_image_patches.cpp b/...functional/plugin/gpu/shared_tests_instances/single_layer_tests/extract_image_patches.cpp
@@ -0,0 +1,70 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/extract_image_patches.hpp"
+
+using namespace LayerTestsDefinitions;
+using ngraph::op::PadType;
+
+namespace {
+
+const std::vector<std::vector<size_t>> inDataShape = {
+    {1, 1, 10, 10},
+    {1, 3, 10, 10}
+};
+const std::vector<std::vector<size_t>> kernels = {
+    {2, 2},
+    {3, 3},
+    {4, 4},
+    {1, 3},
+    {4, 2}
+};
+const std::vector<std::vector<size_t>> strides = {
+    {3, 3},
+    {5, 5},
+    {9, 9},
+    {1, 3},
+    {6, 2}
+};
+const std::vector<std::vector<size_t>> rates = {
+    {1, 1},
+    {1, 2},
+    {2, 1},
+    {2, 2}
+};
+const std::vector<PadType> autoPads = {
+    PadType::VALID,
+    PadType::SAME_UPPER,
+    PadType::SAME_LOWER
+};
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    //InferenceEngine::Precision::I8,
+    InferenceEngine::Precision::U8,
+    InferenceEngine::Precision::I16,
+    InferenceEngine::Precision::I32,
+    InferenceEngine::Precision::FP32
+};
+
+const auto extractImagePatchesParamsSet = ::testing::Combine(
+        ::testing::ValuesIn(inDataShape),
+        ::testing::ValuesIn(kernels),
+        ::testing::ValuesIn(strides),
+        ::testing::ValuesIn(rates),
+        ::testing::ValuesIn(autoPads)
+);
+
+INSTANTIATE_TEST_CASE_P(layers_GPU, ExtractImagePatchesTest,
+        ::testing::Combine(
+            ::testing::ValuesIn(inDataShape),
+            ::testing::ValuesIn(kernels),
+            ::testing::ValuesIn(strides),
+            ::testing::ValuesIn(rates),
+            ::testing::ValuesIn(autoPads),
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+        ExtractImagePatchesTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/thirdparty/clDNN/api/extract_image_patches.hpp b/inference-engine/thirdparty/clDNN/api/extract_image_patches.hpp
@@ -0,0 +1,79 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "primitive.hpp"
+
+namespace cldnn {
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief The ExtractImagePatches operation collects patches from the input tensor, as if applying a convolution. All extracted patches are stacked in the depth dimension of the output.
+/// @details The ExtractImagePatches operation is similar to the TensorFlow*
+/// operation ExtractImagePatches.
+/// This op extracts patches of shape `sizes` which are `strides` apart in the
+/// input image. The output elements are taken from the input at intervals
+/// given by the `rate` argument, as in dilated convolutions.
+/// The result is a 4D tensor containing image patches with size
+/// `size[0] * size[1] * depth` vectorized in the "depth" dimension.
+/// The "auto_pad" attribute has no effect on the size of each patch, it
+/// determines how many patches are extracted.
+struct extract_image_patches : public primitive_base<extract_image_patches> {
+    CLDNN_DECLARE_PRIMITIVE(extract_image_patches)
+
+    /// @brief Constructs select primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id containing input 4-D tensor.
+    /// @param sizes Vector with sizes.
+    /// @param strides Vector with strides.
+    /// @param rates Vector with rates.
+    /// @param auto_pad How the padding is calculated.
+    /// @param output_shape Tensor with shape of output layout
+    extract_image_patches(const primitive_id& id,
+                          const primitive_id& input,
+                          const std::vector<unsigned int>& sizes,
+                          const std::vector<unsigned int>& strides,
+                          const std::vector<unsigned int>& rates,
+                          const std::string& auto_pad,
+                          const tensor& output_shape,
+                          const padding& output_padding = padding())
+        : primitive_base(id, {input}, output_padding),
+          sizes(sizes),
+          strides(strides),
+          rates(rates),
+          auto_pad(auto_pad),
+          output_shape(output_shape) {}
+
+    /// @brief Vector with sizes
+    std::vector<unsigned int> sizes;
+    /// @brief Vector with strides
+    std::vector<unsigned int> strides;
+    /// @brief Vector with rates
+    std::vector<unsigned int> rates;
+    /// @brief Mode how the padding is calculated
+    std::string auto_pad;
+    /// @brief Shape of output layout
+    tensor output_shape;
+};
+/// @}
+/// @}
+/// @}
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
@@ -85,7 +85,8 @@ enum class KernelType {
     GRN,
     CTC_GREEDY_DECODER,
     CUM_SUM,
-    EMBEDDING_BAG
+    EMBEDDING_BAG,
+    EXTRACT_IMAGE_PATCHES
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

diff --git a/..._selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp b/..._selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp
@@ -0,0 +1,108 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "extract_image_patches_kernel_base.h"
+#include <vector>
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+ParamsKey ExtractImagePatchesKernelBase::GetSupportedKey() const {
+    ParamsKey k;
+
+    k.EnableAllInputDataType();
+    k.EnableAllOutputDataType();
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBatching();
+    return k;
+}
+
+JitConstants ExtractImagePatchesKernelBase::GetJitConstants(const extract_image_patches_params& params) const {
+    JitConstants jit = MakeBaseParamsJitConstants(params);
+
+    jit.AddConstants({
+        MakeJitConstant("SIZE_ROWS", params.sizes[0]),
+        MakeJitConstant("SIZE_COLS", params.sizes[1]),
+        MakeJitConstant("STRIDE_ROWS", params.strides[0]),
+        MakeJitConstant("STRIDE_COLS", params.strides[1]),
+        MakeJitConstant("RATES_ROWS", params.rates[0]),
+        MakeJitConstant("RATES_COLS", params.rates[1]),
+    });
+    if (params.auto_pad == "same_upper")
+        jit.AddConstant(MakeJitConstant("AUTO_PAD", 1));
+    else if (params.auto_pad == "same_lower")
+        jit.AddConstant(MakeJitConstant("AUTO_PAD", 2));
+
+    return jit;
+}
+
+ExtractImagePatchesKernelBase::DispatchData ExtractImagePatchesKernelBase::SetDefault(const extract_image_patches_params& params) const {
+    DispatchData kd;
+
+    std::vector<size_t> global = { params.output.Batch().v,
+                                   params.output.Feature().v,
+                                   params.output.Y().v * params.output.X().v };
+
+    const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+
+    kd.gws0 = global[0];
+    kd.gws1 = global[1];
+    kd.gws2 = global[2];
+
+    kd.lws0 = local[0];
+    kd.lws1 = local[1];
+    kd.lws2 = local[2];
+
+    return kd;
+}
+
+KernelsData ExtractImagePatchesKernelBase::GetCommonKernelsData(const Params& params,
+                                                                const optional_params& options,
+                                                                float estimated_time) const {
+    if (!Validate(params, options)) {
+        return KernelsData();
+    }
+
+    const auto& prim_params = static_cast<const extract_image_patches_params&>(params);
+
+    auto run_info = SetDefault(prim_params);
+    KernelData kd = KernelData::Default<extract_image_patches_params>(params);
+
+    auto cldnn_jit = GetJitConstants(prim_params);
+    auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+    auto& kernel = kd.kernels[0];
+    FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+
+    kd.estimatedTime = estimated_time;
+
+    return {kd};
+}
+
+bool ExtractImagePatchesKernelBase::Validate(const Params& p, const optional_params&) const {
+    const extract_image_patches_params& params = static_cast<const extract_image_patches_params&>(p);
+
+    if (params.GetType() != KernelType::EXTRACT_IMAGE_PATCHES) {
+        return false;
+    }
+
+    return true;
+}
+}  // namespace kernel_selector