Skip to content

Commit

Permalink
[GPU] Revert slice fusing
Browse files Browse the repository at this point in the history
  • Loading branch information
Lyamin-Roman committed Oct 14, 2024
1 parent ac96e15 commit e60043f
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,7 @@ KERNEL(slice_ref)(OPTIONAL_SHAPE_INFO_ARG
START_BUFFER
STEP_BUFFER
AXES_BUFFER
__global OUTPUT_TYPE* restrict output
#if HAS_FUSED_OPS_DECLS
, FUSED_OPS_DECLS
#endif
)
__global OUTPUT_TYPE* restrict output)
{
LOAD_BUFFER(START, start_buff);
LOAD_BUFFER(STEP, step_buff);
Expand Down Expand Up @@ -88,12 +84,7 @@ KERNEL(slice_ref)(OPTIONAL_SHAPE_INFO_ARG
slice_begin_dim4 + output_dim4 * slice_step[4]);
#endif

#if HAS_FUSED_OPS
FUSED_OPS;
output[output_index] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT);
#else
output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS);
#endif
}

#undef LOAD_BUFFER;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,16 +142,6 @@ JitConstants SliceKernelRef::GetJitConstants(const slice_params& params) const {
addJitConstantsForParam(jit, "START", params.compile_time_start, params.start_data_type, default_decorator);
addJitConstantsForParam(jit, "STEP", params.compile_time_step, params.step_data_type, default_decorator);

if (!params.fused_ops.empty()) {
std::vector<std::string> idx_order = {"b", "f", "y", "x"};
if (params.inputs[0].GetDims().size() == 5) {
idx_order = {"b", "f", "z", "y", "x"};
}

FusedOpsConfiguration conf = { "", idx_order, "input[input_index]", params.inputs[0].GetDType() };
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
}

return jit;
}

Expand Down
99 changes: 99 additions & 0 deletions src/plugins/intel_gpu/tests/unit/fusions/slice_fusion_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "test_utils.h"
#include "fusion_test_common.hpp"

#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/data.hpp>
#include <intel_gpu/primitives/slice.hpp>
#include <intel_gpu/primitives/eltwise.hpp>

#include <cmath>

using namespace cldnn;
using namespace ::tests;

namespace {
struct slice_test_params {
ov::PartialShape in_shape;
ov::Shape start;
ov::Shape stop;
ov::Shape step;
ov::Shape axes;
ov::PartialShape out_shape;
data_types input_type;
format input_format;
size_t expected_fused_primitives;
size_t expected_not_fused_primitives;
};

class SliceFusingsTest : public ::BaseFusingTest<slice_test_params> {
public:
void execute(slice_test_params& p) {
auto input_prim = get_mem(get_input_layout(p));

network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
network network_fused(this->engine, this->topology_fused, cfg_fused);

network_fused.set_input_data("input", input_prim);
network_not_fused.set_input_data("input", input_prim);

compare(network_not_fused, network_fused, p);
}

layout get_input_layout(slice_test_params& p) {
return layout{ p.in_shape, p.input_type, p.input_format };
}

format get_input_format(slice_test_params &p) {
return p.input_format;
}

layout get_output_layout(slice_test_params& p) {
return layout{ p.out_shape, p.input_type, p.input_format };
}
};

} // namespace

/* ----------------------------------------------------------------------------------------------------- */
/* -------------------------------------------- Slice cases -------------------------------------------- */
/* ----------------------------------------------------------------------------------------------------- */

#define CASE_SLICE_F16_1 { 20, 10, 5 }, { 0, 0, 0 }, { 4, 10, 5, 1 }, { 1, 1, 1 }, { 0, 1, 2 }, { 4, 10, 5 }, data_types::f16, format::bfyx

class slice_add_relu : public SliceFusingsTest {};
TEST_P(slice_add_relu, basic) {
auto p = GetParam();

auto start = engine.allocate_memory({ { p.in_shape.rank() }, data_types::f32, format::bfyx });
auto stop = engine.allocate_memory({ { p.in_shape.rank() }, data_types::f32, format::bfyx });
auto step = engine.allocate_memory({ { p.in_shape.rank() }, data_types::f32, format::bfyx });
auto axes = engine.allocate_memory({ { p.in_shape.rank() }, data_types::f32, format::bfyx });
set_values(start, p.start);
set_values(stop, p.stop);
set_values(step, p.step);
set_values(axes, p.axes);

create_topologies(
input_layout("input", get_input_layout(p)),
data("start", start),
data("stop", stop),
data("step", step),
data("axes", axes),
data("add_data", get_mem(get_output_layout(p), 0.5f)),
slice("slice", { input_info("input"), input_info("start"), input_info("stop"), input_info("step"), input_info("axes") }),
eltwise("add", { input_info("slice"), input_info("add_data") }, eltwise_mode::sum, p.input_type),
activation("activation", input_info("add"), activation_func::relu),
reorder("reorder_bfyx", input_info("activation"), p.input_format, data_types::f32)
);

tolerance = default_tolerance(p.input_type);
execute(p);
}

INSTANTIATE_TEST_SUITE_P(fusings_gpu, slice_add_relu, ::testing::ValuesIn(std::vector<slice_test_params>{
slice_test_params{ CASE_SLICE_F16_1, 1, 3 },
}));

0 comments on commit e60043f

Please sign in to comment.