From 582f72cc71556493fbec27f2dc21292e95b579c5 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 20 Nov 2023 13:28:22 +0400 Subject: [PATCH] [GPU] Test fixes --- .../include/intel_gpu/graph/network.hpp | 1 + .../graph_optimizer/prepare_buffer_fusing.cpp | 33 +++++++++++++++++++ .../src/graph/impls/cpu/read_value.cpp | 15 ++++++--- .../src/graph/include/read_value_inst.h | 5 +++ src/plugins/intel_gpu/src/graph/network.cpp | 14 +++++--- .../intel_gpu/src/graph/primitive_inst.cpp | 1 - .../intel_gpu/src/graph/read_value.cpp | 14 +++++++- .../src/plugin/sync_infer_request.cpp | 2 +- .../functional/behavior/infer_request.cpp | 21 ++++++++++++ .../tests/unit/test_cases/variable.cpp | 12 +++---- 10 files changed, 99 insertions(+), 19 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index fa1d9eafece546..51f079529a4eed 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -220,6 +220,7 @@ struct network { } void set_variable(const std::string& name, const std::shared_ptr& variable); + bool has_variable(const std::string &variable_id) const; ov::intel_gpu::VariableState& get_variable(const std::string &variable_id) const; const ov::intel_gpu::VariableStateInfo& get_variable_info(const std::string &variable_id) const; const ov::intel_gpu::VariablesMap& get_variables() const; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 4deb4e107dff55..80d12155450b04 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -2,12 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // #include "prepare_buffer_fusing.h" +#include "intel_gpu/primitives/read_value.hpp" #include "pooling_inst.h" #include "primitive_inst.h" #include "activation_inst.h" #include "concatenation_inst.h" #include "crop_inst.h" #include "eltwise_inst.h" +#include "read_value_inst.h" #include "reshape_inst.h" #include "depth_to_space_inst.h" #include "resample_inst.h" @@ -602,5 +604,36 @@ void prepare_buffer_fusing::run(program& p) { node.can_be_optimized(can_reshape_be_optimized(node)); }); + program_helpers::do_for_types(*node, [](read_value_node& node) { + // Current implementation allows to avoid copy on read_value primitive + // only in cases when it has single user + // Otherwise we may face an issue with exeuction of read_value users and assign to the same variable + // Graph below is an example of unsupported case + // ┌────────┐ ┌───────┐ + // │ Param1 │ │ Const │ + // └───┬────┘ └───┬───┘ + // │ │ + // │ ┌────┴──────┐ + // .......│.........│ ReadValue │ + // . │ └────┬─────┬┘ + // . │ │ │ + // . │ ┌─────┐ │ │ + // . └───┤ Add ├────┘ │ + // . └──┬──┘ │ + // . │ │ + // . │ │ + // . ┌────────┐ │ ┌─────┐ │ + // ..│ Assign ├──┴────┤ Add ├──┘ + // └────────┘ └──┬──┘ + // │ + // │ + // ┌────┴──────┐ + // │ Result │ + // └───────────┘ + // If read_value here returns virable memory w/o copy, then based on Add-s and Assign execution order we may have different results + // TODO: Allow optimizations for the case above too. Looks like it can be achieved by more careful + // topological sort (i.e. if we ensure that all read_value users are completed before assign is run) + node.can_be_optimized(node.get_users().size() == 1); + }); } } diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp index 700280195bc702..c11522c0c8dc6d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp @@ -48,17 +48,22 @@ struct read_value_impl : public typed_primitive_impl { } auto& variable = instance.get_network().get_variable(variable_id); + auto &stream = instance.get_network().get_stream(); OPENVINO_ASSERT(variable.get_layout() == instance.get_output_layout(), "[GPU] Layout mismatch: variable layout: ", variable.get_layout().to_short_string(), " read_value output layout: ", instance.get_output_layout().to_short_string()); - instance.set_output_memory(variable.get_memory(), false, 0); - if (!variable.is_set()) { - auto &stream = instance.get_network().get_stream(); - const auto ev_set_output = instance.output_memory().fill(stream, 0); - return ev_set_output; + if (instance.get_impl_params()->input_layouts.size() > 0) { + variable.get_memory()->copy_from(stream, instance.dep_memory(0), true); + } else { + variable.get_memory()->fill(stream, 0); + } + } + + if (!instance.can_be_optimized()) { + return instance.output_memory(0).copy_from(stream, *variable.get_memory(), false); } return instance.get_network().get_stream().create_user_event(true); diff --git a/src/plugins/intel_gpu/src/graph/include/read_value_inst.h b/src/plugins/intel_gpu/src/graph/include/read_value_inst.h index 1260a5108806e2..8baac4fbae3a7e 100644 --- a/src/plugins/intel_gpu/src/graph/include/read_value_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/read_value_inst.h @@ -37,6 +37,11 @@ class typed_primitive_inst : public typed_primitive_inst_base; diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 6632a48398a81b..effa84dac7b3a5 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -684,7 +684,7 @@ void network::set_arguments() { // In that case some_op is static and we may want to set arguments once, // but dynamic optimized out reshape means that output buffer of reshape is unavailable // and attempt to set args will fail. - if (dep.first->can_be_optimized() && dep.first->is_dynamic()) + if (dep.first->can_be_optimized() && (dep.first->is_dynamic() || dep.first->get_node().is_type())) can_set_args = false; } @@ -1649,10 +1649,14 @@ void network::transfer_memory_to_device(std::shared_ptr instance void network::set_variable(const std::string& name, const std::shared_ptr& variable) { GPU_DEBUG_TRACE_DETAIL << "Set variable " << name << " " << variable->get_layout().to_short_string() << std::endl; _variables_states[name] = variable; - for (auto& inst : _variable_state_primitives.at(name)) { - if (variable->get_layout().is_static()) - inst->set_output_memory(variable->get_memory(), false, 0); - } + // for (auto& inst : _variable_state_primitives.at(name)) { + // if (variable->get_layout().is_static()) + // inst->set_output_memory(variable->get_memory(), false, 0); + // } +} + +bool network::has_variable(const std::string &variable_id) const { + return _variables_states.find(variable_id) != _variables_states.end(); } ov::intel_gpu::VariableState& network::get_variable(const std::string &variable_id) const { diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 0bb9454adb85bc..6cb80d94a2a4d4 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -451,7 +451,6 @@ event::ptr primitive_inst::realloc_if_needed() { std::string variable_id = stateful_prim->variable_id(); auto variable = get_network().get_variable(variable_id); variable.set_layout(actual_layout); - return ev; } bool can_reuse_buffer = _outputs[0] && actual_layout.count() <= max_output_layout_size; diff --git a/src/plugins/intel_gpu/src/graph/read_value.cpp b/src/plugins/intel_gpu/src/graph/read_value.cpp index 7657557a2b13dd..7d37738331f8c3 100644 --- a/src/plugins/intel_gpu/src/graph/read_value.cpp +++ b/src/plugins/intel_gpu/src/graph/read_value.cpp @@ -11,7 +11,7 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(read_value) read_value_inst::typed_primitive_inst(network& network, const read_value_node& node) : - parent(network, node, false), + parent(network, node, !node.can_be_optimized() && (node.get_output_layout().is_static() || node.get_output_layout().has_upper_bound())), memory_state::variable{node.get_primitive()->variable_id} { } @@ -31,6 +31,18 @@ std::string read_value_inst::to_string(const read_value_node& node) { return primitive_description.str(); } +void read_value_inst::on_execute() { + update_output_memory(); +} + +void read_value_inst::update_output_memory() { + if (!can_be_optimized() || !get_network().has_variable(variable_id())) + return; + + const auto& variable = get_network().get_variable(variable_id()); + set_output_memory(variable.get_memory(), false, 0); +} + void read_value_inst::save(cldnn::BinaryOutputBuffer& ob) const { parent::save(ob); diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 8c9141eac953ec..9bfac56c46c78d 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -670,7 +670,7 @@ void SyncInferRequest::allocate_states() { const auto& network = m_graph->get_network(); const auto& variables_info = network->get_variables_info(); for (auto& vi : variables_info) { - auto variable = std::make_shared(vi.second, network->get_engine(), network->get_shape_predictor()); + auto variable = std::make_shared(vi.second, network->get_engine(), *network->get_shape_predictor()); m_variables.emplace(vi.first, variable); } } diff --git a/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp b/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp index af0229d5e81d8b..6262c366aec905 100644 --- a/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp +++ b/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp @@ -222,3 +222,24 @@ TEST(TensorTest, smoke_canReallocateDeviceInputForHostTensor) { ASSERT_NO_THROW(inf_req.infer()); } +TEST(VariablesTest, smoke_canSetStateTensor) { + auto ov = ov::Core(); + const ov::Shape virable_shape = {1, 3, 2, 4}; + const ov::Shape input_shape = {1, 3, 2, 4}; + const ov::element::Type et = ov::element::f16; + auto model = ngraph::builder::subgraph::makeReadConcatSplitAssign(input_shape, et); + auto compiled_model = ov.compile_model(model, ov::test::utils::DEVICE_GPU); + auto request = compiled_model.create_infer_request(); + + ov::Tensor variable_tensor(et, virable_shape); + ov::Tensor input_tensor(et, input_shape); + + auto variables = request.query_state(); + ASSERT_EQ(variables.size(), 1); + auto variable = variables.front(); + ASSERT_EQ(variable.get_name(), "v0"); + auto default_state_tensor = variable.get_state(); + ASSERT_EQ(default_state_tensor.get_shape(), virable_shape); + + ASSERT_NO_THROW(request.infer()); +} diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/variable.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/variable.cpp index 0653657434eb5e..e8fc4a1aa600c1 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/variable.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/variable.cpp @@ -40,7 +40,7 @@ struct variable_test : public ::testing::TestWithParam> { cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); - auto variable = std::make_shared(VariableStateInfo{"v0", variable_layout}, engine, network->get_shape_predictor()); + auto variable = std::make_shared(VariableStateInfo{"v0", variable_layout}, engine, *network->get_shape_predictor()); network->set_variable("v0", variable); network->set_input_data("input", input_data); @@ -129,7 +129,7 @@ void test_exception_on_wrong_layout(bool is_caching_test) { cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); - auto variable = std::make_shared(VariableStateInfo{"v0", variable_layout}, engine, network->get_shape_predictor()); + auto variable = std::make_shared(VariableStateInfo{"v0", variable_layout}, engine, *network->get_shape_predictor()); network->set_variable("v0", variable); network->set_input_data("input", input_data); network->set_input_data("wrong_input", wrong_input_data); @@ -167,7 +167,7 @@ void test_different_output_data_type(bool is_caching_test) { config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); - auto variable = std::make_shared(VariableStateInfo{"v0", variable_layout}, engine, network->get_shape_predictor()); + auto variable = std::make_shared(VariableStateInfo{"v0", variable_layout}, engine, *network->get_shape_predictor()); network->set_variable("v0", variable); network->set_input_data("input", input_data); @@ -223,9 +223,9 @@ void test_variables_are_preserved_across_inferences(bool is_caching_test) { cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); - auto variable1 = std::make_shared(VariableStateInfo{"v1", variable_layout}, engine, network->get_shape_predictor()); - auto variable2 = std::make_shared(VariableStateInfo{"v2", variable_layout}, engine, network->get_shape_predictor()); - auto variable3 = std::make_shared(VariableStateInfo{"v_result", variable_layout}, engine, network->get_shape_predictor()); + auto variable1 = std::make_shared(VariableStateInfo{"v1", variable_layout}, engine, *network->get_shape_predictor()); + auto variable2 = std::make_shared(VariableStateInfo{"v2", variable_layout}, engine, *network->get_shape_predictor()); + auto variable3 = std::make_shared(VariableStateInfo{"v_result", variable_layout}, engine, *network->get_shape_predictor()); network->set_variable("v1", variable1); network->set_variable("v2", variable2); network->set_variable("v_result", variable3);