From 08cd0f77790dff74df02b04eebb3feca3c94edf3 Mon Sep 17 00:00:00 2001 From: Egor Churaev Date: Mon, 29 Jun 2020 00:36:30 -0700 Subject: [PATCH] [IE CLDNN] Implement ExtractImagePatches operation (#1127) The ExtractImagePatches operation collects patches from the input tensor, as if applying a convolution. All extracted patches are stacked in the depth dimension of the output. JIRA: 30055 --- .../src/cldnn_engine/cldnn_program.cpp | 30 + .../src/cldnn_engine/cldnn_program.h | 2 + .../extract_image_patches.cpp | 70 +++ .../clDNN/api/extract_image_patches.hpp | 79 +++ .../kernel_selector/common/common_types.h | 3 +- .../extract_image_patches_kernel_base.cpp | 108 ++++ .../extract_image_patches_kernel_base.h | 58 ++ .../extract_image_patches_kernel_ref.cpp | 26 + .../extract_image_patches_kernel_ref.h | 29 + .../extract_image_patches_kernel_selector.cpp | 26 + .../extract_image_patches_kernel_selector.h | 32 + .../core/cl_kernels/cum_sum_ref.cl | 6 +- .../cl_kernels/extract_image_patches_ref.cl | 64 ++ .../clDNN/src/extract_image_patches.cpp | 69 +++ .../src/gpu/extract_image_patches_gpu.cpp | 72 +++ .../thirdparty/clDNN/src/gpu/register_gpu.cpp | 1 + .../thirdparty/clDNN/src/gpu/register_gpu.hpp | 1 + .../prepare_primitive_fusing.cpp | 8 +- .../src/include/extract_image_patches_inst.h | 49 ++ .../extract_image_patches_gpu_test.cpp | 577 ++++++++++++++++++ 20 files changed, 1303 insertions(+), 7 deletions(-) create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/extract_image_patches.cpp create mode 100644 inference-engine/thirdparty/clDNN/api/extract_image_patches.hpp create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.h create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.cpp create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.h create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.cpp create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.h create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/extract_image_patches_ref.cl create mode 100644 inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp create mode 100644 inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp create mode 100644 inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h create mode 100644 inference-engine/thirdparty/clDNN/tests/test_cases/extract_image_patches_gpu_test.cpp diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp index 5745564cf0ee01..37c30015c29fcc 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -605,6 +606,7 @@ Program::LayerType Program::LayerTypeFromStr(const std::string &str) { { "EmbeddingBagPackedSum", EmbeddingBagPackedSum }, { "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum }, { "EmbeddingSegmentsSum", EmbeddingSegmentsSum }, + { "ExtractImagePatches" , ExtractImagePatches }, }; auto it = LayerNameToType.find(str); if (it != LayerNameToType.end()) @@ -1297,6 +1299,8 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEng break; case EmbeddingSegmentsSum: CreateEmbeddingSegmentsSumPrimitive(topology, layer); break; + case ExtractImagePatches: CreateExtractImagePatchesPrimitive(topology, layer); + break; default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type); } } @@ -4889,6 +4893,32 @@ void Program::CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, Inf AddPrimitiveToProfiler(layerName, layer); } +void Program::CreateExtractImagePatchesPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer) { + ValidateLayer(layer, 1); + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto eipLayer = as(layer); + + std::vector sizes = eipLayer->GetParamAsUInts("sizes"); + std::vector strides = eipLayer->GetParamAsUInts("strides"); + std::vector rates = eipLayer->GetParamAsUInts("rates"); + std::string auto_pad = eipLayer->GetParamAsString("auto_pad"); + + std::string eipLayerName = layer_type_name_ID(layer); + + auto extractImagePatchesPrim = cldnn::extract_image_patches( + eipLayerName, + inputPrimitives[0], + sizes, + strides, + rates, + auto_pad, + CldnnTensorFromIEDims(eipLayer->outData[0]->getTensorDesc().getDims())); + + topology.add(extractImagePatchesPrim); + AddPrimitiveToProfiler(eipLayerName, layer); +} + bool Program::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const { if (splitLayer->outData.size() != 2) return false; // split into 2 diff --git a/inference-engine/src/cldnn_engine/cldnn_program.h b/inference-engine/src/cldnn_engine/cldnn_program.h index 3fb33869bf81e6..598e2584d0e2be 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.h +++ b/inference-engine/src/cldnn_engine/cldnn_program.h @@ -221,6 +221,7 @@ class Program { EmbeddingBagPackedSum, EmbeddingBagOffsetsSum, EmbeddingSegmentsSum, + ExtractImagePatches, NO_TYPE }; using GenericBlobMap = std::map; @@ -382,6 +383,7 @@ class Program { void CreateEmbeddingBagPackedSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer); void CreateEmbeddingBagOffsetsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer); void CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer); + void CreateExtractImagePatchesPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); }; } // namespace CLDNNPlugin diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/extract_image_patches.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/extract_image_patches.cpp new file mode 100644 index 00000000000000..21f977dc04b026 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/extract_image_patches.cpp @@ -0,0 +1,70 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/extract_image_patches.hpp" + +using namespace LayerTestsDefinitions; +using ngraph::op::PadType; + +namespace { + +const std::vector> inDataShape = { + {1, 1, 10, 10}, + {1, 3, 10, 10} +}; +const std::vector> kernels = { + {2, 2}, + {3, 3}, + {4, 4}, + {1, 3}, + {4, 2} +}; +const std::vector> strides = { + {3, 3}, + {5, 5}, + {9, 9}, + {1, 3}, + {6, 2} +}; +const std::vector> rates = { + {1, 1}, + {1, 2}, + {2, 1}, + {2, 2} +}; +const std::vector autoPads = { + PadType::VALID, + PadType::SAME_UPPER, + PadType::SAME_LOWER +}; +const std::vector netPrecisions = { + //InferenceEngine::Precision::I8, + InferenceEngine::Precision::U8, + InferenceEngine::Precision::I16, + InferenceEngine::Precision::I32, + InferenceEngine::Precision::FP32 +}; + +const auto extractImagePatchesParamsSet = ::testing::Combine( + ::testing::ValuesIn(inDataShape), + ::testing::ValuesIn(kernels), + ::testing::ValuesIn(strides), + ::testing::ValuesIn(rates), + ::testing::ValuesIn(autoPads) +); + +INSTANTIATE_TEST_CASE_P(layers_GPU, ExtractImagePatchesTest, + ::testing::Combine( + ::testing::ValuesIn(inDataShape), + ::testing::ValuesIn(kernels), + ::testing::ValuesIn(strides), + ::testing::ValuesIn(rates), + ::testing::ValuesIn(autoPads), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ExtractImagePatchesTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/thirdparty/clDNN/api/extract_image_patches.hpp b/inference-engine/thirdparty/clDNN/api/extract_image_patches.hpp new file mode 100644 index 00000000000000..0402036290ea78 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/extract_image_patches.hpp @@ -0,0 +1,79 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once +#include "primitive.hpp" + +namespace cldnn { +/// @addtogroup cpp_api C++ API +/// @{ +/// @addtogroup cpp_topology Network Topology +/// @{ +/// @addtogroup cpp_primitives Primitives +/// @{ + +/// @brief The ExtractImagePatches operation collects patches from the input tensor, as if applying a convolution. All extracted patches are stacked in the depth dimension of the output. +/// @details The ExtractImagePatches operation is similar to the TensorFlow* +/// operation ExtractImagePatches. +/// This op extracts patches of shape `sizes` which are `strides` apart in the +/// input image. The output elements are taken from the input at intervals +/// given by the `rate` argument, as in dilated convolutions. +/// The result is a 4D tensor containing image patches with size +/// `size[0] * size[1] * depth` vectorized in the "depth" dimension. +/// The "auto_pad" attribute has no effect on the size of each patch, it +/// determines how many patches are extracted. +struct extract_image_patches : public primitive_base { + CLDNN_DECLARE_PRIMITIVE(extract_image_patches) + + /// @brief Constructs select primitive. + /// @param id This primitive id. + /// @param input Input primitive id containing input 4-D tensor. + /// @param sizes Vector with sizes. + /// @param strides Vector with strides. + /// @param rates Vector with rates. + /// @param auto_pad How the padding is calculated. + /// @param output_shape Tensor with shape of output layout + extract_image_patches(const primitive_id& id, + const primitive_id& input, + const std::vector& sizes, + const std::vector& strides, + const std::vector& rates, + const std::string& auto_pad, + const tensor& output_shape, + const padding& output_padding = padding()) + : primitive_base(id, {input}, output_padding), + sizes(sizes), + strides(strides), + rates(rates), + auto_pad(auto_pad), + output_shape(output_shape) {} + + /// @brief Vector with sizes + std::vector sizes; + /// @brief Vector with strides + std::vector strides; + /// @brief Vector with rates + std::vector rates; + /// @brief Mode how the padding is calculated + std::string auto_pad; + /// @brief Shape of output layout + tensor output_shape; +}; +/// @} +/// @} +/// @} +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h index fd1caf890fdd37..58772662a8916c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h @@ -85,7 +85,8 @@ enum class KernelType { GRN, CTC_GREEDY_DECODER, CUM_SUM, - EMBEDDING_BAG + EMBEDDING_BAG, + EXTRACT_IMAGE_PATCHES }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp new file mode 100644 index 00000000000000..f3c3e7c6187a16 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.cpp @@ -0,0 +1,108 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "extract_image_patches_kernel_base.h" +#include +#include "kernel_selector_utils.h" + +namespace kernel_selector { +ParamsKey ExtractImagePatchesKernelBase::GetSupportedKey() const { + ParamsKey k; + + k.EnableAllInputDataType(); + k.EnableAllOutputDataType(); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfyx); + + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + return k; +} + +JitConstants ExtractImagePatchesKernelBase::GetJitConstants(const extract_image_patches_params& params) const { + JitConstants jit = MakeBaseParamsJitConstants(params); + + jit.AddConstants({ + MakeJitConstant("SIZE_ROWS", params.sizes[0]), + MakeJitConstant("SIZE_COLS", params.sizes[1]), + MakeJitConstant("STRIDE_ROWS", params.strides[0]), + MakeJitConstant("STRIDE_COLS", params.strides[1]), + MakeJitConstant("RATES_ROWS", params.rates[0]), + MakeJitConstant("RATES_COLS", params.rates[1]), + }); + if (params.auto_pad == "same_upper") + jit.AddConstant(MakeJitConstant("AUTO_PAD", 1)); + else if (params.auto_pad == "same_lower") + jit.AddConstant(MakeJitConstant("AUTO_PAD", 2)); + + return jit; +} + +ExtractImagePatchesKernelBase::DispatchData ExtractImagePatchesKernelBase::SetDefault(const extract_image_patches_params& params) const { + DispatchData kd; + + std::vector global = { params.output.Batch().v, + params.output.Feature().v, + params.output.Y().v * params.output.X().v }; + + const auto& local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + + kd.gws0 = global[0]; + kd.gws1 = global[1]; + kd.gws2 = global[2]; + + kd.lws0 = local[0]; + kd.lws1 = local[1]; + kd.lws2 = local[2]; + + return kd; +} + +KernelsData ExtractImagePatchesKernelBase::GetCommonKernelsData(const Params& params, + const optional_params& options, + float estimated_time) const { + if (!Validate(params, options)) { + return KernelsData(); + } + + const auto& prim_params = static_cast(params); + + auto run_info = SetDefault(prim_params); + KernelData kd = KernelData::Default(params); + + auto cldnn_jit = GetJitConstants(prim_params); + auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options); + auto jit = CreateJit(kernelName, cldnn_jit, entry_point); + + auto& kernel = kd.kernels[0]; + FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point); + + kd.estimatedTime = estimated_time; + + return {kd}; +} + +bool ExtractImagePatchesKernelBase::Validate(const Params& p, const optional_params&) const { + const extract_image_patches_params& params = static_cast(p); + + if (params.GetType() != KernelType::EXTRACT_IMAGE_PATCHES) { + return false; + } + + return true; +} +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.h new file mode 100644 index 00000000000000..b06ff357bd1e4c --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_base.h @@ -0,0 +1,58 @@ +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "common_kernel_base.h" +#include "kernel_selector_params.h" + +#include + +namespace kernel_selector { +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// extract_image_patches_params +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct extract_image_patches_params : public base_params { + extract_image_patches_params() : base_params(KernelType::EXTRACT_IMAGE_PATCHES) {} + + std::vector sizes; + std::vector strides; + std::vector rates; + std::string auto_pad; + + virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// extract_image_patches_optional_params +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct extract_image_patches_optional_params : optional_params { + extract_image_patches_optional_params() : optional_params(KernelType::EXTRACT_IMAGE_PATCHES) {} +}; + +class ExtractImagePatchesKernelBase : public common_kernel_base { +public: + using common_kernel_base::common_kernel_base; + using DispatchData = CommonDispatchData; + virtual ~ExtractImagePatchesKernelBase() {} + +protected: + virtual ParamsKey GetSupportedKey() const override; + virtual JitConstants GetJitConstants(const extract_image_patches_params& params) const; + DispatchData SetDefault(const extract_image_patches_params& params) const; + KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const; + + bool Validate(const Params& p, const optional_params&) const override; +}; +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.cpp new file mode 100644 index 00000000000000..a9114e49b1089d --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.cpp @@ -0,0 +1,26 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "extract_image_patches_kernel_ref.h" +#include "kernel_selector_utils.h" +#include +#include + +namespace kernel_selector { +KernelsData ExtractImagePatchesKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { + return GetCommonKernelsData(params, options, DONT_USE_IF_HAVE_SOMETHING_ELSE); +} +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.h new file mode 100644 index 00000000000000..b406ed6d2d5140 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_ref.h @@ -0,0 +1,29 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "extract_image_patches_kernel_base.h" + +namespace kernel_selector { +class ExtractImagePatchesKernelRef : public ExtractImagePatchesKernelBase { +public: + ExtractImagePatchesKernelRef() : ExtractImagePatchesKernelBase("extract_image_patches_ref") {} + virtual ~ExtractImagePatchesKernelRef() = default; +protected: + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; +}; +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.cpp new file mode 100644 index 00000000000000..eb3858e05c114a --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.cpp @@ -0,0 +1,26 @@ +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "extract_image_patches_kernel_selector.h" +#include "extract_image_patches_kernel_ref.h" + +namespace kernel_selector { +extract_image_patches_kernel_selector::extract_image_patches_kernel_selector() { + Attach(); +} + +KernelsData extract_image_patches_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const { + return GetNaiveBestKernel(params, options, KernelType::EXTRACT_IMAGE_PATCHES); +} +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.h new file mode 100644 index 00000000000000..d69ad0d70ae29c --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/extract_image_patches/extract_image_patches_kernel_selector.h @@ -0,0 +1,32 @@ +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "kernel_selector.h" + +namespace kernel_selector { +class extract_image_patches_kernel_selector : public kernel_selector_base { +public: + static extract_image_patches_kernel_selector& Instance() { + static extract_image_patches_kernel_selector instance_; + return instance_; + } + + extract_image_patches_kernel_selector(); + virtual ~extract_image_patches_kernel_selector() = default; + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; +}; +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/cum_sum_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/cum_sum_ref.cl index daa37228b95d98..7595d3bd2c87b4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/cum_sum_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/cum_sum_ref.cl @@ -44,9 +44,9 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint KERNEL(cum_sum_ref)( const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output) { - const uint batch = get_global_id(0); - const uint features = get_global_id(1) / OUTPUT_SIZE_W; - const uint w = get_global_id(1) % OUTPUT_SIZE_W; + const uint batch = (uint)get_global_id(0); + const uint features = (uint)get_global_id(1) / OUTPUT_SIZE_W; + const uint w = (uint)get_global_id(1) % OUTPUT_SIZE_W; const uint yx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); const uint z = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); const uint y = yx / OUTPUT_SIZE_X; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/extract_image_patches_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/extract_image_patches_ref.cl new file mode 100644 index 00000000000000..f142f5034d4ab4 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/extract_image_patches_ref.cl @@ -0,0 +1,64 @@ +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "include/include_all.cl" + +KERNEL(extract_image_patches_ref)(const __global INPUT0_TYPE* input, + __global OUTPUT_TYPE* output) +{ + const uint batch = (uint)get_global_id(0); + const uint out_depth = (uint)get_global_id(1); + const uint out_row = (uint)get_global_id(2) / OUTPUT_SIZE_X; + const uint out_col = (uint)get_global_id(2) % OUTPUT_SIZE_X; + + int row_padding = 0; + int col_padding = 0; +#ifdef AUTO_PAD + uint num_out_rows = OUTPUT_SIZE_Y * STRIDE_ROWS + (SIZE_ROWS * RATES_ROWS - STRIDE_ROWS); +#if RATES_ROWS > 1 + --num_out_rows; +#endif // RATES_ROWS > 1 + const int row_padding_size = max((int)(num_out_rows - INPUT0_SIZE_Y), 0); + uint num_out_cols = OUTPUT_SIZE_X * STRIDE_COLS + (SIZE_COLS * RATES_COLS - STRIDE_COLS); +#if RATES_COLS > 1 + --num_out_cols; +#endif // RATES_COLS > 1 + const int col_padding_size = max((int)(num_out_cols - INPUT0_SIZE_X), 0); + row_padding = row_padding_size / 2; + col_padding = col_padding_size / 2; +#if AUTO_PAD == 2 // same_lower + row_padding = (row_padding_size % 2) + row_padding; + col_padding = (col_padding_size % 2) + col_padding; +#endif // AUTO_PAD == 2 +#endif // AUTO_PAD + + const uint cur_row_ind = out_depth / (INPUT0_FEATURE_NUM * SIZE_COLS); + const uint row = cur_row_ind + + STRIDE_ROWS * out_row - row_padding + + (RATES_ROWS - 1) * cur_row_ind; + const uint cur_col_ind = (out_depth % (INPUT0_FEATURE_NUM * SIZE_COLS)) / INPUT0_FEATURE_NUM; + const uint col = cur_col_ind + + STRIDE_COLS * out_col - col_padding + + (RATES_COLS - 1) * cur_col_ind; + + const uint depth = out_depth % INPUT0_FEATURE_NUM; + const uint in_ind = INPUT0_GET_INDEX_SAFE(batch, depth, row, col); + const uint out_ind = OUTPUT_GET_INDEX(batch, out_depth, out_row, out_col); + OUTPUT_TYPE res = TO_OUTPUT_TYPE(input[in_ind]); +#ifdef AUTO_PAD + if (row < 0 || col < 0 || row >= INPUT0_SIZE_Y || col >= INPUT0_SIZE_X) + res = OUTPUT_VAL_ZERO; +#endif + output[out_ind] = ACTIVATION(res, ACTIVATION_PARAMS); +} diff --git a/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp b/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp new file mode 100644 index 00000000000000..d265e9cc9b635f --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp @@ -0,0 +1,69 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "extract_image_patches_inst.h" + +#include "primitive_type_base.h" +#include "error_handler.h" +#include "json_object.h" +#include + +namespace cldnn { +primitive_type_id extract_image_patches::type_id() { + static primitive_type_base instance; + return &instance; +} + +layout extract_image_patches_inst::calc_output_layout(extract_image_patches_node const& node) { + auto desc = node.get_primitive(); + + auto input_layout = node.input(0).get_output_layout(); + auto input_format = input_layout.format; + + auto output_shape = desc->output_shape; + return layout(input_layout.data_type, input_format, output_shape); +} + +std::string extract_image_patches_inst::to_string(extract_image_patches_node const& node) { + auto desc = node.get_primitive(); + auto node_info = node.desc_to_json(); + auto& input = node.input(); + + std::stringstream primitive_description; + + std::stringstream sizes, strides, rates; + sizes << desc->sizes[0] << "," << desc->sizes[1]; + strides << desc->strides[0] << "," << desc->strides[1]; + rates << desc->rates[0] << "," << desc->rates[1]; + + json_composite extract_image_patches_info; + extract_image_patches_info.add("input id", input.id()); + extract_image_patches_info.add("input shape", input.get_output_layout().size.to_string()); + extract_image_patches_info.add("sizes", sizes.str()); + extract_image_patches_info.add("strides", strides.str()); + extract_image_patches_info.add("rates", rates.str()); + extract_image_patches_info.add("auto_pad", desc->auto_pad); + extract_image_patches_info.add("output shape", input.calc_output_layout().size.to_string()); + + node_info->add("extract_image_patches info", extract_image_patches_info); + node_info->dump(primitive_description); + + return primitive_description.str(); +} + +extract_image_patches_inst::typed_primitive_inst(network_impl& network, extract_image_patches_node const& node) : parent(network, node) {} + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp new file mode 100644 index 00000000000000..735eb2e35199db --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp @@ -0,0 +1,72 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "extract_image_patches_inst.h" +#include "primitive_gpu_base.h" +#include "implementation_map.h" +#include "error_handler.h" +#include "kernel_selector_helper.h" + +#include "extract_image_patches/extract_image_patches_kernel_selector.h" +#include "extract_image_patches/extract_image_patches_kernel_ref.h" + +namespace cldnn { +namespace gpu { + +struct extract_image_patches_gpu : typed_primitive_gpu_impl { + using parent = typed_primitive_gpu_impl; + using parent::parent; + +public: + static primitive_impl* create(const extract_image_patches_node& arg) { + auto params = get_default_params(arg); + auto optional_params = + get_default_optional_params(arg.get_program()); + + params.sizes = arg.get_primitive()->sizes; + params.strides = arg.get_primitive()->strides; + params.rates = arg.get_primitive()->rates; + params.auto_pad = arg.get_primitive()->auto_pad; + + auto& kernel_selector = kernel_selector::extract_image_patches_kernel_selector::Instance(); + auto best_kernels = kernel_selector.GetBestKernels(params, optional_params); + + CLDNN_ERROR_BOOL(arg.id(), + "Best_kernel.empty()", + best_kernels.empty(), + "Cannot find a proper kernel with this arguments"); + + auto extract_image_patches = new extract_image_patches_gpu(arg, best_kernels[0]); + + return extract_image_patches; + } +}; + +namespace detail { + +attach_extract_image_patches_gpu::attach_extract_image_patches_gpu() { + implementation_map::add( + {{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), extract_image_patches_gpu::create}, + {std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), extract_image_patches_gpu::create}, + {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), extract_image_patches_gpu::create}, + {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), extract_image_patches_gpu::create}, + {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), extract_image_patches_gpu::create}, + {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), extract_image_patches_gpu::create}}); +} + +} // namespace detail +} // namespace gpu +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp index ae133b220b21c9..bcf0872a53b5e6 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp @@ -101,6 +101,7 @@ void register_implementations_gpu() { REGISTER_GPU(ctc_greedy_decoder); REGISTER_GPU(cum_sum); REGISTER_GPU(embedding_bag); + REGISTER_GPU(extract_image_patches); } } // namespace gpu diff --git a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp index 80ba080c0f7200..23daa9ece89ddf 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp @@ -180,6 +180,7 @@ REGISTER_GPU(grn); REGISTER_GPU(ctc_greedy_decoder); REGISTER_GPU(cum_sum); REGISTER_GPU(embedding_bag); +REGISTER_GPU(extract_image_patches); #undef REGISTER_GPU diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp index 90348d554d40c0..c1ca243198f210 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp @@ -53,6 +53,7 @@ #include "strided_slice_inst.h" #include "cum_sum_inst.h" #include "embedding_bag_inst.h" +#include "extract_image_patches_inst.h" #include #include #include @@ -201,9 +202,10 @@ void prepare_primitive_fusing::fuse_activations(program_impl &p) { !input.is_type() && !input.is_type() && !input.is_type() && !input.is_type() && !input.is_type() && !input.is_type() && !input.is_type() && !input.is_type() && !input.is_type() && - !input.is_type() && !input.is_type() && !input.is_type() && - !input.is_type() && !input.is_type() && !input.is_type() && - !input.is_type() && !input.is_type() && !input.is_type() && + !input.is_type() && !input.is_type() && + !input.is_type() && !input.is_type() && !input.is_type() && + !input.is_type() && !input.is_type() && !input.is_type() && + !input.is_type() && !input.is_type() && !input.is_type() && !input.is_type())) return; diff --git a/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h b/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h new file mode 100644 index 00000000000000..fad58adfcb3583 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h @@ -0,0 +1,49 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include "api/extract_image_patches.hpp" +#include "primitive_inst.h" + +namespace cldnn { +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + +public: + using parent::parent; + + program_node& input(size_t index = 0) const { return get_dependency(index); } +}; + +using extract_image_patches_node = typed_program_node; + +template <> +class typed_primitive_inst : public typed_primitive_inst_base { + using parent = typed_primitive_inst_base; + +public: + static layout calc_output_layout(extract_image_patches_node const& node); + static std::string to_string(extract_image_patches_node const& node); + +public: + typed_primitive_inst(network_impl& network, extract_image_patches_node const& desc); +}; + +using extract_image_patches_inst = typed_primitive_inst; +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/extract_image_patches_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/extract_image_patches_gpu_test.cpp new file mode 100644 index 00000000000000..a1ad3a015bbb3f --- /dev/null +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/extract_image_patches_gpu_test.cpp @@ -0,0 +1,577 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#include + +#include +#include +#include +#include +#include +#include + +#include + +using namespace cldnn; +using namespace tests; + +TEST(extract_image_patches_gpu, basic) { + // Input : 1x1x10x10 + // Output : 1x9x2x2 + + tensor output_shape = {1, 9, 2, 2}; + const auto& engine = get_test_engine(); + auto batch = 1; + auto depth = 1; + auto in_rows = 10; + auto in_cols = 10; + std::vector sizes = {3, 3}; + std::vector strides = {5, 5}; + std::vector rates = {1, 1}; + std::string auto_pad = "valid"; + + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + + std::vector inputVals(batch * depth * in_rows * in_cols); + std::generate(inputVals.begin(), inputVals.end(), []() { + static float n = 1; + return n++; + }); + + set_values(input, inputVals); + + topology topology; + topology.add(input_layout("Input0", input.get_layout())); + topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); + + network network(engine, topology); + network.set_input_data("Input0", input); + auto outputs = network.execute(); + + EXPECT_EQ(outputs.size(), size_t(1)); + EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); + + auto output = outputs.at("extract_image_patches").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 1, 6, + 51, 56, + + 2, 7, + 52, 57, + + 3, 8, + 53, 58, + + 11, 16, + 61, 66, + + 12, 17, + 62, 67, + + 13, 18, + 63, 68, + + 21, 26, + 71, 76, + + 22, 27, + 72, 77, + + 23, 28, + 73, 78 + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(extract_image_patches_gpu, basic2) { + // Input : 1x1x10x10 + // Output : 1x16x1x1 + + const auto& engine = get_test_engine(); + auto batch = 1; + auto depth = 1; + auto in_rows = 10; + auto in_cols = 10; + std::vector sizes = {4, 4}; + std::vector strides = {8, 8}; + std::vector rates = {1, 1}; + std::string auto_pad = "valid"; + tensor output_shape = {1, 16, 1, 1}; + + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + + std::vector inputVals(batch * depth * in_rows * in_cols); + std::generate(inputVals.begin(), inputVals.end(), []() { + static float n = 1; + return n++; + }); + + set_values(input, inputVals); + + topology topology; + topology.add(input_layout("Input0", input.get_layout())); + topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); + + network network(engine, topology); + network.set_input_data("Input0", input); + auto outputs = network.execute(); + + EXPECT_EQ(outputs.size(), size_t(1)); + EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); + + auto output = outputs.at("extract_image_patches").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 1, + 2, + 3, + 4, + 11, + 12, + 13, + 14, + 21, + 22, + 23, + 24, + 31, + 32, + 33, + 34 + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(extract_image_patches_gpu, basic3) { + // Input : 1x1x10x10 + // Output : 1x16x2x2 + + const auto& engine = get_test_engine(); + auto batch = 1; + auto depth = 1; + auto in_rows = 10; + auto in_cols = 10; + std::vector sizes = {4, 4}; + std::vector strides = {9, 9}; + std::vector rates = {1, 1}; + std::string auto_pad = "same_upper"; + tensor output_shape = {1, 16, 2, 2}; + + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + + std::vector inputVals(batch * depth * in_rows * in_cols); + std::generate(inputVals.begin(), inputVals.end(), []() { + static float n = 1; + return n++; + }); + + set_values(input, inputVals); + + topology topology; + topology.add(input_layout("Input0", input.get_layout())); + topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); + + network network(engine, topology); + network.set_input_data("Input0", input); + auto outputs = network.execute(); + + EXPECT_EQ(outputs.size(), size_t(1)); + EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); + + auto output = outputs.at("extract_image_patches").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 0, 0, + 0, 89, + + 0, 0, + 81, 90, + + 0, 0, + 82, 0, + + 0, 0, + 83, 0, + + 0, 9, + 0, 99, + + 1, 10, + 91, 100, + + 2, 0, + 92, 0, + + 3, 0, + 93, 0, + + 0, 19, + 0, 0, + + 11, 20, + 0, 0, + + 12, 0, + 0, 0, + + 13, 0, + 0, 0, + + 0, 29, + 0, 0, + + 21, 30, + 0, 0, + + 22, 0, + 0, 0, + + 23, 0, + 0, 0, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(extract_image_patches_gpu, basic3_same_lower) { + // Input : 1x1x10x10 + // Output : 1x16x2x2 + + const auto& engine = get_test_engine(); + auto batch = 1; + auto depth = 1; + auto in_rows = 10; + auto in_cols = 10; + std::vector sizes = {4, 4}; + std::vector strides = {9, 9}; + std::vector rates = {1, 1}; + std::string auto_pad = "same_lower"; + tensor output_shape = {1, 16, 2, 2}; + + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + + std::vector inputVals(batch * depth * in_rows * in_cols); + std::generate(inputVals.begin(), inputVals.end(), []() { + static float n = 1; + return n++; + }); + + set_values(input, inputVals); + + topology topology; + topology.add(input_layout("Input0", input.get_layout())); + topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); + + network network(engine, topology); + network.set_input_data("Input0", input); + auto outputs = network.execute(); + + EXPECT_EQ(outputs.size(), size_t(1)); + EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); + + auto output = outputs.at("extract_image_patches").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 0, 0, + 0, 78, + + 0, 0, + 0, 79, + + 0, 0, + 71, 80, + + 0, 0, + 72, 0, + + 0, 0, + 0, 88, + + 0, 0, + 0, 89, + + 0, 0, + 81, 90, + + 0, 0, + 82, 0, + + 0, 8, + 0, 98, + + 0, 9, + 0, 99, + + 1, 10, + 91, 100, + + 2, 0, + 92, 0, + + 0, 18, + 0, 0, + + 0, 19, + 0, 0, + + 11, 20, + 0, 0, + + 12, 0, + 0, 0, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(extract_image_patches_gpu, basic3_enough_space) { + // Input : 1x1x10x10 + // Output : 1x9x2x2 + + const auto& engine = get_test_engine(); + auto batch = 1; + auto depth = 1; + auto in_rows = 10; + auto in_cols = 10; + std::vector sizes = {3, 3}; + std::vector strides = {7, 7}; + std::vector rates = {1, 1}; + std::string auto_pad = "same_upper"; + tensor output_shape = {1, 9, 2, 2}; + + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + + std::vector inputVals(batch * depth * in_rows * in_cols); + std::generate(inputVals.begin(), inputVals.end(), []() { + static float n = 1; + return n++; + }); + + set_values(input, inputVals); + + topology topology; + topology.add(input_layout("Input0", input.get_layout())); + topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); + + network network(engine, topology); + network.set_input_data("Input0", input); + auto outputs = network.execute(); + + EXPECT_EQ(outputs.size(), size_t(1)); + EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); + + auto output = outputs.at("extract_image_patches").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 1, 8, + 71, 78, + + 2, 9, + 72, 79, + + 3, 10, + 73, 80, + + 11, 18, + 81, 88, + + 12, 19, + 82, 89, + + 13, 20, + 83, 90, + + 21, 28, + 91, 98, + + 22, 29, + 92, 99, + + 23, 30, + 93, 100, + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(extract_image_patches_gpu, basic4) { + // Input : 1x1x10x10 + // Output : 1x9x2x2 + + const auto& engine = get_test_engine(); + auto batch = 1; + auto depth = 1; + auto in_rows = 10; + auto in_cols = 10; + std::vector sizes = {3, 3}; + std::vector strides = {5, 5}; + std::vector rates = {2, 2}; + std::string auto_pad = "valid"; + tensor output_shape = {1, 9, 2, 2}; + + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + + std::vector inputVals(batch * depth * in_rows * in_cols); + std::generate(inputVals.begin(), inputVals.end(), []() { + static float n = 1; + return n++; + }); + + set_values(input, inputVals); + + topology topology; + topology.add(input_layout("Input0", input.get_layout())); + topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); + + network network(engine, topology); + network.set_input_data("Input0", input); + auto outputs = network.execute(); + + EXPECT_EQ(outputs.size(), size_t(1)); + EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); + + auto output = outputs.at("extract_image_patches").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 1, 6, + 51, 56, + + 3, 8, + 53, 58, + + 5, 10, + 55, 60, + + 21, 26, + 71, 76, + + 23, 28, + 73, 78, + + 25, 30, + 75, 80, + + 41, 46, + 91, 96, + + 43, 48, + 93, 98, + + 45, 50, + 95, 100 + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +} + +TEST(extract_image_patches_gpu, basic5) { + // Input : 1x2x5x5 + // Output : 1x8x2x2 + + const auto& engine = get_test_engine(); + auto batch = 1; + auto depth = 2; + auto in_rows = 5; + auto in_cols = 5; + std::vector sizes = {2, 2}; + std::vector strides = {3, 3}; + std::vector rates = {1, 1}; + std::string auto_pad = "valid"; + tensor output_shape = {1, 8, 2, 2}; + + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + + std::vector inputVals(batch * depth * in_rows * in_cols); + std::generate(inputVals.begin(), inputVals.end(), []() { + static float n = 1; + return n++; + }); + + set_values(input, inputVals); + + topology topology; + topology.add(input_layout("Input0", input.get_layout())); + topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); + + network network(engine, topology); + network.set_input_data("Input0", input); + auto outputs = network.execute(); + + EXPECT_EQ(outputs.size(), size_t(1)); + EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); + + auto output = outputs.at("extract_image_patches").get_memory(); + auto output_ptr = output.pointer(); + + std::vector answers = { + 1, 4, + 16, 19, + + 26, 29, + 41, 44, + + 2, 5, + 17, 20, + + 27, 30, + 42, 45, + + 6, 9, + 21, 24, + + 31, 34, + 46, 49, + + 7, 10, + 22, 25, + + 32, 35, + 47, 50 + }; + + ASSERT_EQ(answers.size(), output_ptr.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } +}