openvinotoolkit · pkowalc1 · Dec 19, 2024 · Nov 28, 2024 · Nov 29, 2024 · Dec 5, 2024
@@ -10,75 +10,10 @@
 
 namespace ov {
 namespace test {
-using ov::test::STFTLayerTest;
 
-const std::vector<ov::element::Type> data_type = {ov::element::f32, ov::element::bf16};
-const std::vector<ov::element::Type> step_size_type = {ov::element::i32, ov::element::i64};
-
-const std::vector<std::vector<InputShape>> input_shapes = {
-    {   // Static shapes
-        {{}, {{128}}},    // 1st input
-        {{}, {{8}}},      // 2nd input
-        {{}, {{}}},       // 3rd input
-        {{}, {{}}}        // 4th input
-    },
-    {   // Static shapes
-        {{}, {{1, 128}}}, // 1st input
-        {{}, {{8}}},      // 2nd input
-        {{}, {{}}},       // 3rd input
-        {{}, {{}}}        // 4th input
-    },
-    {   // Static shapes
-        {{}, {{2, 226}}}, // 1st input
-        {{}, {{16}}},     // 2nd input
-        {{}, {{}}},       // 3rd input
-        {{}, {{}}}        // 4th input
-    },
-    {   // Dynamic dims in the first input shape
-        {{-1, -1}, {{1, 128}, {2, 226}}},   // 1st input
-        {{}, {{8}}},                        // 2nd input
-        {{}, {{}}},                         // 3rd input
-        {{}, {{}}}                          // 4th input
-    },
-    {   // Dynamic dims in the first and second input shape
-        {{-1}, {{128}}},                    // 1st input
-        {{-1}, {{8}}},                      // 2nd input
-        {{}, {{}}},                         // 3rd input
-        {{}, {{}}}                          // 4th input
-    },
-    {   // Dynamic dims in the first and second input shape
-        {{-1, -1}, {{1, 128}, {2, 226}}},   // 1st input
-        {{-1}, {{8}, {16}}},                // 2nd input
-        {{}, {{}}},                         // 3rd input
-        {{}, {{}}}                          // 4th input
-    },
-    {   // Dynamic dims with range in the first and second input shape
-        {{{2, 4}, {1, 300}}, {{2, 226}, {3, 128}}}, // 1st input
-        {{{3, 16}}, {{4}, {16}}},                   // 2nd input
-        {{}, {{}}},                                 // 3rd input
-        {{}, {{}}}                                  // 4th input
-    }
-};
-
-const std::vector<int64_t> frame_size = {16, 24};
-const std::vector<int64_t> step_size = {2, 3, 4};
-
-const std::vector<bool> transpose_frames = {
-    false,
-    true,
-};
-
-std::vector<utils::InputLayerType> in_types = {utils::InputLayerType::CONSTANT, utils::InputLayerType::PARAMETER};
-
-const auto testCaseStatic = ::testing::Combine(::testing::ValuesIn(input_shapes),
-                                               ::testing::ValuesIn(frame_size),
-                                               ::testing::ValuesIn(step_size),
-                                               ::testing::ValuesIn(transpose_frames),
-                                               ::testing::ValuesIn(data_type),
-                                               ::testing::ValuesIn(step_size_type),
-                                               ::testing::ValuesIn(in_types),
-                                               ::testing::Values(ov::test::utils::DEVICE_CPU));
-
-INSTANTIATE_TEST_SUITE_P(smoke_STFT_static, STFTLayerTest, testCaseStatic, STFTLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_STFT_static,
+                         STFTLayerTest,
+                         STFTLayerTest::GetTestDataForDevice(ov::test::utils::DEVICE_CPU),
+                         STFTLayerTest::getTestCaseName);
 }  // namespace test
 }  // namespace ov
@@ -273,6 +273,7 @@ REGISTER_FACTORY(v15, ROIAlignRotated);
 REGISTER_FACTORY(v15, BitwiseRightShift);
 REGISTER_FACTORY(v15, BitwiseLeftShift);
 REGISTER_FACTORY(v15, SearchSorted);
+REGISTER_FACTORY(v15, STFT);
 
 // --------------------------- Supported internal ops --------------------------- //
 REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);

@@ -0,0 +1,62 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include "primitive.hpp"
+
+namespace cldnn {
+
+/// @brief Short time fourier transform (STFT) operation.
+/// @details Checks the specification for details.
+struct STFT : public primitive_base<STFT> {
+    CLDNN_DECLARE_PRIMITIVE(STFT)
+
+    STFT() : primitive_base("", {}) {}
+
+    /// @brief Constructs STFT primitive.
+    /// @param id This primitive id.
+    /// @param signal signal input.
+    /// @param window window input.
+    /// @param frame_size Size of the frame.
+    /// @param frame_step Step between frames.
+    /// @param transpose_frames Enable/Disable transpose_frames(check specification for details)..
+
+    STFT(const primitive_id& id,
+         const input_info& signal,
+         const input_info& window,
+         const input_info& frame_size,
+         const input_info& frame_step,
+         const bool transpose_frames)
+        : primitive_base(id, {signal, window, frame_size, frame_step}),
+          transpose_frames(transpose_frames) {}
+
+    /// @brief Enable/Disabletranspose_frames(check specification for details).
+    bool transpose_frames = false;
+
+    size_t hash() const override {
+        size_t seed = primitive::hash();
+        seed = hash_combine(seed, transpose_frames);
+        return seed;
+    }
+
+    bool operator==(const primitive& rhs) const override {
+        if (!compare_common_params(rhs))
+            return false;
+
+        auto rhs_casted = downcast<const STFT>(rhs);
+
+        return transpose_frames == rhs_casted.transpose_frames;
+    }
+
+    void save(BinaryOutputBuffer& ob) const override {
+        primitive_base<STFT>::save(ob);
+        ob << transpose_frames;
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        primitive_base<STFT>::load(ib);
+        ib >> transpose_frames;
+    }
+};
+}  // namespace cldnn
@@ -88,6 +88,7 @@ void register_implementations() {
     REGISTER_OCL(scaled_dot_product_attention);
     REGISTER_OCL(rope);
     REGISTER_OCL(search_sorted);
+    REGISTER_OCL(STFT);
 }
 
 }  // namespace ocl

@@ -162,6 +162,7 @@ REGISTER_OCL(unique_gather);
 REGISTER_OCL(scaled_dot_product_attention);
 REGISTER_OCL(rope);
 REGISTER_OCL(search_sorted);
+REGISTER_OCL(STFT);
 
 #undef REGISTER_OCL
 

@@ -0,0 +1,97 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "primitive_base.hpp"
+#include "stft/stft_kernel_base.h"
+#include "stft/stft_kernel_selector.h"
+#include "stft_inst.h"
+
+namespace cldnn {
+namespace ocl {
+
+struct STFT_impl : typed_primitive_impl_ocl<STFT> {
+    using parent = typed_primitive_impl_ocl<STFT>;
+    using parent::parent;
+    using kernel_selector_t = kernel_selector::STFT_kernel_selector;
+    using kernel_params_t = kernel_selector::STFT_params;
+
+    DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::STFT_impl)
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<STFT_impl>(*this);
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        parent::load(ib);
+        if (is_dynamic()) {
+            auto& kernel_selector = kernel_selector_t::Instance();
+            auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
+            kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
+        }
+    }
+
+    void update_dispatch_data(const kernel_impl_params& impl_param) override {
+        // If model loaded from cache, params are not initialized, so we create a new object and reuse it in the future
+        if (_kernel_data.params == nullptr) {
+            _kernel_data.params = std::make_shared<kernel_params_t>(get_kernel_params(impl_param, true));
+        }
+
+        update_shapes(*_kernel_data.params, impl_param);
+        (_kernel_data.update_dispatch_data_func)(*_kernel_data.params, _kernel_data);
+    }
+
+    static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool shape_agnostic = false) {
+        const auto& primitive = impl_param.typed_desc<STFT>();
+        auto params = get_default_params<kernel_selector::STFT_params>(impl_param, shape_agnostic);
+
+        // Manually add all inputs except first one, since get_default_params does not handle it.
+        for (size_t i = 1; i < impl_param.input_layouts.size(); ++i) {
+            params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i)));
+        }
+
+        params.transpose_frames = primitive->transpose_frames;
+        return params;
+    }
+
+    // [NOTE]: Has to be added as a separete static function, since it is called via static dispatching in
+    // typed_primitive_impl_ocl::create()..
+    static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
+        auto updated_impl_params = canonicalize_fused_shapes(impl_params);
+
+        for (auto& input_layout : updated_impl_params.input_layouts) {
+            input_layout.set_partial_shape(extend_shape_to_rank_from_begin(input_layout.get_partial_shape()));
+        }
+
+        for (auto& output_layout : updated_impl_params.output_layouts) {
+            output_layout.set_partial_shape(extend_shape_to_rank_from_begin(output_layout.get_partial_shape()));
+        }
+
+        return updated_impl_params;
+    }
+
+    kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
+        return static_canonicalize_shapes(impl_params);
+    }
+};
+
+namespace detail {
+
+attach_STFT_impl::attach_STFT_impl() {
+    auto types = {data_types::i32, data_types::i64, data_types::f16, data_types::f32};
+
+    auto formats = {format::bfyx};
+
+    implementation_map<STFT>::add(impl_types::ocl,
+                                  shape_types::any,
+                                  typed_primitive_impl_ocl<STFT>::create<STFT_impl>,
+                                  types,
+                                  formats);
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
+
+BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::STFT_impl)
+BIND_BINARY_BUFFER_WITH_TYPE(cldnn::STFT)
@@ -216,3 +216,4 @@ REGISTER_DEFAULT_IMPLS(unique_gather, OCL_S, OCL_D);
 REGISTER_DEFAULT_IMPLS(scaled_dot_product_attention, OCL_S, OCL_D);
 REGISTER_DEFAULT_IMPLS(rope, OCL_S, OCL_D);
 REGISTER_DEFAULT_IMPLS(search_sorted, OCL_S, OCL_D);
+REGISTER_DEFAULT_IMPLS(STFT, OCL_S, OCL_D);
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include <intel_gpu/primitives/stft.hpp>
+
+#include "primitive_inst.h"
+
+namespace cldnn {
+
+template <>
+struct typed_program_node<STFT> : public typed_program_node_base<STFT> {
+    using parent = typed_program_node_base<STFT>;
+    typed_program_node(const std::shared_ptr<STFT> prim, program& prog) : parent(prim, prog) {}
+
+public:
+    using parent::parent;
+
+    program_node& input(size_t idx = 0) const {
+        return get_dependency(idx);
+    }
+    std::vector<size_t> get_shape_infer_dependencies() const override {
+        return {2, 3};
+    }
+};
+
+using STFT_node = typed_program_node<STFT>;
+
+template <>
+class typed_primitive_inst<STFT> : public typed_primitive_inst_base<STFT> {
+    using parent = typed_primitive_inst_base<STFT>;
+    using parent::parent;
+
+public:
+    typed_primitive_inst(network& network, STFT_node const& desc);
+    template <typename ShapeType>
+    static std::vector<layout> calc_output_layouts(STFT_node const& node, kernel_impl_params const& impl_param);
+    static layout calc_output_layout(STFT_node const& node, kernel_impl_params const& impl_param);
+    static std::string to_string(STFT_node const& node);
+};
+
+using STFT_inst = typed_primitive_inst<STFT>;
+
+}  // namespace cldnn
@@ -0,0 +1,63 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <json_object.h>
+#include <stft_inst.h>
+
+#include <sstream>
+
+#include "memory_accessor.hpp"
+#include "openvino/core/enum_names.hpp"
+#include "primitive_type_base.h"
+#include "stft_shape_inference.hpp"
+
+namespace cldnn {
+GPU_DEFINE_PRIMITIVE_TYPE_ID(STFT)
+
+STFT_inst::typed_primitive_inst(network& network, STFT_node const& node) : parent(network, node) {}
+
+layout STFT_inst::calc_output_layout(STFT_node const& node, kernel_impl_params const& impl_param) {
+    return calc_output_layouts<ov::PartialShape>(node, impl_param)[0];
+}
+
+template <typename ShapeType>
+std::vector<layout> STFT_inst::calc_output_layouts(STFT_node const& node, kernel_impl_params const& impl_param) {
+    auto primitive = impl_param.typed_desc<STFT>();
+
+    const auto& signal_layout = impl_param.get_input_layout(0);
+    const auto& window_layout = impl_param.get_input_layout(1);
+    const auto& frame_size_layout = impl_param.get_input_layout(2);
+    const auto& frame_step_layout = impl_param.get_input_layout(3);
+
+    std::vector<ShapeType> input_shapes = {
+        signal_layout.get<ShapeType>(),
+        window_layout.get<ShapeType>(),
+        frame_size_layout.get<ShapeType>(),
+        frame_step_layout.get<ShapeType>(),
+    };
+
+    const auto ta = MemoryAccessor(&impl_param.memory_deps, impl_param.get_stream());
+
+    std::vector<ShapeType> output_shapes;
+    ov::op::v15::STFT op;
+    op.set_transpose_frames(primitive->transpose_frames);
+    output_shapes = shape_infer(&op, input_shapes, ta);
+
+    return {layout{output_shapes[0], signal_layout.data_type, signal_layout.format}};
+}
+
+std::string STFT_inst::to_string(STFT_node const& node) {
+    auto node_info = node.desc_to_json();
+    json_composite STFT_info;
+    STFT_info.add("signal", node.input(0).id());
+    STFT_info.add("window", node.input(1).id());
+    STFT_info.add("framesize", node.input(2).id());
+    STFT_info.add("framestep", node.input(3).id());
+    STFT_info.add("transpose_frames", node.get_primitive()->transpose_frames);
+    node_info->add("STFT info", STFT_info);
+    std::stringstream primitive_description;
+    node_info->dump(primitive_description);
+    return primitive_description.str();
+}
+
+}  // namespace cldnn