Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU]: Added basic STFT implementation #27794

Merged
merged 26 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
3cd31c3
[GPU]: STFT: Added all needed stuff(?) exepct cl kernel implementation.
pkowalc1 Nov 28, 2024
15675df
[GPU]: STFT: added initial unit test.
pkowalc1 Nov 29, 2024
cd48e89
[GPU]: Stft: Fixed shape calculation.
pkowalc1 Dec 5, 2024
f03ab74
[gpu]: [STFT]: Fixed cl kernel launch configuration.
pkowalc1 Dec 10, 2024
a046d37
[gpu]:STFT: Improved debug for unittests.
pkowalc1 Dec 10, 2024
a2afbba
[gpu]: STFT: WIP cl kernel
pkowalc1 Dec 10, 2024
b6c1ba3
[GPU]: STFT: unit test should pass.
pkowalc1 Dec 11, 2024
4f803cf
[gpu]: [stft]: Fixed cl kernel, added more unittests.
pkowalc1 Dec 11, 2024
8826295
[gpu]: Fixed code style of stft_data.h
pkowalc1 Dec 12, 2024
d60e95f
[gpu]: STFT: STFT officially support dynamic shapes.
pkowalc1 Dec 12, 2024
214c5ce
[gpu]: STFT: Added support for win_size less than frame_size.
pkowalc1 Dec 12, 2024
2c1c003
[gpu]: STFT: Fixed bugs with input size and supported formats.
pkowalc1 Dec 13, 2024
0665b41
[gpu]: STFT: implemented kahan sum algo for cl kernel.
pkowalc1 Dec 13, 2024
cfd5bf6
[gpu]: STFT: Enabled new shape infer for STFT.
pkowalc1 Dec 13, 2024
6d76abf
[gpu]: STFT: Added func tests.
pkowalc1 Dec 13, 2024
7f1e43a
Merge branch 'master' into stft_basic_gpu_impl
pkowalc1 Dec 16, 2024
dafc623
[gpu]: STFT: Refactoring.
pkowalc1 Dec 16, 2024
2d02564
[gpu]: STFT: REfactored func tests.
pkowalc1 Dec 16, 2024
0500dc8
[gpu]: STFT: Review Fixes.
pkowalc1 Dec 17, 2024
a3736a6
Merge branch 'master' into stft_basic_gpu_impl
pkowalc1 Dec 17, 2024
d6765f7
Merge branch 'openvinotoolkit:master' into stft_basic_gpu_impl
pkowalc1 Dec 18, 2024
c6bffc6
[gpu]: STFT: Review fixes.
pkowalc1 Dec 18, 2024
45a5c99
Update src/plugins/intel_gpu/tests/unit/test_cases/stft_gpu_test.cpp
pkowalc1 Dec 18, 2024
65d82bf
[gpu]:STFT: Review fixes.
pkowalc1 Dec 18, 2024
ea2b3b1
Merge branch 'master' into stft_basic_gpu_impl
mlukasze Dec 19, 2024
0475832
Merge branch 'master' into stft_basic_gpu_impl
mlukasze Dec 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,75 +10,10 @@

namespace ov {
namespace test {
using ov::test::STFTLayerTest;

const std::vector<ov::element::Type> data_type = {ov::element::f32, ov::element::bf16};
const std::vector<ov::element::Type> step_size_type = {ov::element::i32, ov::element::i64};

const std::vector<std::vector<InputShape>> input_shapes = {
{ // Static shapes
{{}, {{128}}}, // 1st input
{{}, {{8}}}, // 2nd input
{{}, {{}}}, // 3rd input
{{}, {{}}} // 4th input
},
{ // Static shapes
{{}, {{1, 128}}}, // 1st input
{{}, {{8}}}, // 2nd input
{{}, {{}}}, // 3rd input
{{}, {{}}} // 4th input
},
{ // Static shapes
{{}, {{2, 226}}}, // 1st input
{{}, {{16}}}, // 2nd input
{{}, {{}}}, // 3rd input
{{}, {{}}} // 4th input
},
{ // Dynamic dims in the first input shape
{{-1, -1}, {{1, 128}, {2, 226}}}, // 1st input
{{}, {{8}}}, // 2nd input
{{}, {{}}}, // 3rd input
{{}, {{}}} // 4th input
},
{ // Dynamic dims in the first and second input shape
{{-1}, {{128}}}, // 1st input
{{-1}, {{8}}}, // 2nd input
{{}, {{}}}, // 3rd input
{{}, {{}}} // 4th input
},
{ // Dynamic dims in the first and second input shape
{{-1, -1}, {{1, 128}, {2, 226}}}, // 1st input
{{-1}, {{8}, {16}}}, // 2nd input
{{}, {{}}}, // 3rd input
{{}, {{}}} // 4th input
},
{ // Dynamic dims with range in the first and second input shape
{{{2, 4}, {1, 300}}, {{2, 226}, {3, 128}}}, // 1st input
{{{3, 16}}, {{4}, {16}}}, // 2nd input
{{}, {{}}}, // 3rd input
{{}, {{}}} // 4th input
}
};

const std::vector<int64_t> frame_size = {16, 24};
const std::vector<int64_t> step_size = {2, 3, 4};

const std::vector<bool> transpose_frames = {
false,
true,
};

std::vector<utils::InputLayerType> in_types = {utils::InputLayerType::CONSTANT, utils::InputLayerType::PARAMETER};

const auto testCaseStatic = ::testing::Combine(::testing::ValuesIn(input_shapes),
::testing::ValuesIn(frame_size),
::testing::ValuesIn(step_size),
::testing::ValuesIn(transpose_frames),
::testing::ValuesIn(data_type),
::testing::ValuesIn(step_size_type),
::testing::ValuesIn(in_types),
::testing::Values(ov::test::utils::DEVICE_CPU));

INSTANTIATE_TEST_SUITE_P(smoke_STFT_static, STFTLayerTest, testCaseStatic, STFTLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_STFT_static,
STFTLayerTest,
STFTLayerTest::GetTestDataForDevice(ov::test::utils::DEVICE_CPU),
STFTLayerTest::getTestCaseName);
} // namespace test
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ REGISTER_FACTORY(v15, ROIAlignRotated);
REGISTER_FACTORY(v15, BitwiseRightShift);
REGISTER_FACTORY(v15, BitwiseLeftShift);
REGISTER_FACTORY(v15, SearchSorted);
REGISTER_FACTORY(v15, STFT);

// --------------------------- Supported internal ops --------------------------- //
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
Expand Down
62 changes: 62 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/primitives/stft.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once
#include "primitive.hpp"

namespace cldnn {

/// @brief Short time fourier transform (STFT) operation.
/// @details Checks the specification for details.
struct STFT : public primitive_base<STFT> {
CLDNN_DECLARE_PRIMITIVE(STFT)

STFT() : primitive_base("", {}) {}

/// @brief Constructs STFT primitive.
/// @param id This primitive id.
/// @param signal signal input.
/// @param window window input.
/// @param frame_size Size of the frame.
/// @param frame_step Step between frames.
/// @param transpose_frames Enable/Disable transpose_frames(check specification for details)..

STFT(const primitive_id& id,
const input_info& signal,
const input_info& window,
const input_info& frame_size,
const input_info& frame_step,
const bool transpose_frames)
: primitive_base(id, {signal, window, frame_size, frame_step}),
transpose_frames(transpose_frames) {}

/// @brief Enable/Disabletranspose_frames(check specification for details).
bool transpose_frames = false;

size_t hash() const override {
size_t seed = primitive::hash();
seed = hash_combine(seed, transpose_frames);
return seed;
}

bool operator==(const primitive& rhs) const override {
if (!compare_common_params(rhs))
return false;

auto rhs_casted = downcast<const STFT>(rhs);

return transpose_frames == rhs_casted.transpose_frames;
}

void save(BinaryOutputBuffer& ob) const override {
primitive_base<STFT>::save(ob);
ob << transpose_frames;
}

void load(BinaryInputBuffer& ib) override {
primitive_base<STFT>::load(ib);
ib >> transpose_frames;
}
};
} // namespace cldnn
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ void register_implementations() {
REGISTER_OCL(scaled_dot_product_attention);
REGISTER_OCL(rope);
REGISTER_OCL(search_sorted);
REGISTER_OCL(STFT);
}

} // namespace ocl
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ REGISTER_OCL(unique_gather);
REGISTER_OCL(scaled_dot_product_attention);
REGISTER_OCL(rope);
REGISTER_OCL(search_sorted);
REGISTER_OCL(STFT);

#undef REGISTER_OCL

Expand Down
97 changes: 97 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/stft.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "primitive_base.hpp"
#include "stft/stft_kernel_base.h"
#include "stft/stft_kernel_selector.h"
#include "stft_inst.h"

namespace cldnn {
namespace ocl {

struct STFT_impl : typed_primitive_impl_ocl<STFT> {
using parent = typed_primitive_impl_ocl<STFT>;
using parent::parent;
using kernel_selector_t = kernel_selector::STFT_kernel_selector;
using kernel_params_t = kernel_selector::STFT_params;

DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::STFT_impl)

std::unique_ptr<primitive_impl> clone() const override {
return make_unique<STFT_impl>(*this);
}

void load(BinaryInputBuffer& ib) override {
parent::load(ib);
if (is_dynamic()) {
auto& kernel_selector = kernel_selector_t::Instance();
auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
}
}

void update_dispatch_data(const kernel_impl_params& impl_param) override {
// If model loaded from cache, params are not initialized, so we create a new object and reuse it in the future
if (_kernel_data.params == nullptr) {
_kernel_data.params = std::make_shared<kernel_params_t>(get_kernel_params(impl_param, true));
}

update_shapes(*_kernel_data.params, impl_param);
(_kernel_data.update_dispatch_data_func)(*_kernel_data.params, _kernel_data);
}

static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<STFT>();
auto params = get_default_params<kernel_selector::STFT_params>(impl_param, shape_agnostic);

// Manually add all inputs except first one, since get_default_params does not handle it.
for (size_t i = 1; i < impl_param.input_layouts.size(); ++i) {
params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i)));
}

params.transpose_frames = primitive->transpose_frames;
return params;
}

// [NOTE]: Has to be added as a separete static function, since it is called via static dispatching in
// typed_primitive_impl_ocl::create()..
static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
auto updated_impl_params = canonicalize_fused_shapes(impl_params);

for (auto& input_layout : updated_impl_params.input_layouts) {
input_layout.set_partial_shape(extend_shape_to_rank_from_begin(input_layout.get_partial_shape()));
}

for (auto& output_layout : updated_impl_params.output_layouts) {
output_layout.set_partial_shape(extend_shape_to_rank_from_begin(output_layout.get_partial_shape()));
}

return updated_impl_params;
}

kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
return static_canonicalize_shapes(impl_params);
}
};

namespace detail {

attach_STFT_impl::attach_STFT_impl() {
auto types = {data_types::i32, data_types::i64, data_types::f16, data_types::f32};

auto formats = {format::bfyx};

implementation_map<STFT>::add(impl_types::ocl,
shape_types::any,
typed_primitive_impl_ocl<STFT>::create<STFT_impl>,
types,
formats);
}

} // namespace detail
} // namespace ocl
} // namespace cldnn

BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::STFT_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::STFT)
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,4 @@ REGISTER_DEFAULT_IMPLS(unique_gather, OCL_S, OCL_D);
REGISTER_DEFAULT_IMPLS(scaled_dot_product_attention, OCL_S, OCL_D);
REGISTER_DEFAULT_IMPLS(rope, OCL_S, OCL_D);
REGISTER_DEFAULT_IMPLS(search_sorted, OCL_S, OCL_D);
REGISTER_DEFAULT_IMPLS(STFT, OCL_S, OCL_D);
45 changes: 45 additions & 0 deletions src/plugins/intel_gpu/src/graph/include/stft_inst.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once

#include <intel_gpu/primitives/stft.hpp>

#include "primitive_inst.h"

namespace cldnn {

template <>
struct typed_program_node<STFT> : public typed_program_node_base<STFT> {
using parent = typed_program_node_base<STFT>;
typed_program_node(const std::shared_ptr<STFT> prim, program& prog) : parent(prim, prog) {}

public:
using parent::parent;

program_node& input(size_t idx = 0) const {
return get_dependency(idx);
}
std::vector<size_t> get_shape_infer_dependencies() const override {
return {2, 3};
}
};

using STFT_node = typed_program_node<STFT>;

template <>
class typed_primitive_inst<STFT> : public typed_primitive_inst_base<STFT> {
using parent = typed_primitive_inst_base<STFT>;
using parent::parent;

public:
typed_primitive_inst(network& network, STFT_node const& desc);
template <typename ShapeType>
static std::vector<layout> calc_output_layouts(STFT_node const& node, kernel_impl_params const& impl_param);
static layout calc_output_layout(STFT_node const& node, kernel_impl_params const& impl_param);
static std::string to_string(STFT_node const& node);
};

using STFT_inst = typed_primitive_inst<STFT>;

} // namespace cldnn
63 changes: 63 additions & 0 deletions src/plugins/intel_gpu/src/graph/stft.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <json_object.h>
#include <stft_inst.h>

#include <sstream>

#include "memory_accessor.hpp"
#include "openvino/core/enum_names.hpp"
#include "primitive_type_base.h"
#include "stft_shape_inference.hpp"

namespace cldnn {
GPU_DEFINE_PRIMITIVE_TYPE_ID(STFT)

STFT_inst::typed_primitive_inst(network& network, STFT_node const& node) : parent(network, node) {}

layout STFT_inst::calc_output_layout(STFT_node const& node, kernel_impl_params const& impl_param) {
return calc_output_layouts<ov::PartialShape>(node, impl_param)[0];
}

template <typename ShapeType>
std::vector<layout> STFT_inst::calc_output_layouts(STFT_node const& node, kernel_impl_params const& impl_param) {
auto primitive = impl_param.typed_desc<STFT>();

const auto& signal_layout = impl_param.get_input_layout(0);
const auto& window_layout = impl_param.get_input_layout(1);
const auto& frame_size_layout = impl_param.get_input_layout(2);
const auto& frame_step_layout = impl_param.get_input_layout(3);

std::vector<ShapeType> input_shapes = {
signal_layout.get<ShapeType>(),
window_layout.get<ShapeType>(),
frame_size_layout.get<ShapeType>(),
frame_step_layout.get<ShapeType>(),
};

const auto ta = MemoryAccessor(&impl_param.memory_deps, impl_param.get_stream());

std::vector<ShapeType> output_shapes;
ov::op::v15::STFT op;
op.set_transpose_frames(primitive->transpose_frames);
output_shapes = shape_infer(&op, input_shapes, ta);

return {layout{output_shapes[0], signal_layout.data_type, signal_layout.format}};
}

std::string STFT_inst::to_string(STFT_node const& node) {
auto node_info = node.desc_to_json();
json_composite STFT_info;
STFT_info.add("signal", node.input(0).id());
STFT_info.add("window", node.input(1).id());
STFT_info.add("framesize", node.input(2).id());
STFT_info.add("framestep", node.input(3).id());
STFT_info.add("transpose_frames", node.get_primitive()->transpose_frames);
node_info->add("STFT info", STFT_info);
std::stringstream primitive_description;
node_info->dump(primitive_description);
return primitive_description.str();
}

} // namespace cldnn
Loading
Loading