Skip to content

Commit

Permalink
[GPU] Test fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
vladimir-paramuzov committed Nov 21, 2023
1 parent 6c1ffbd commit 582f72c
Show file tree
Hide file tree
Showing 10 changed files with 99 additions and 19 deletions.
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ struct network {
}

void set_variable(const std::string& name, const std::shared_ptr<ov::intel_gpu::VariableState>& variable);
bool has_variable(const std::string &variable_id) const;
ov::intel_gpu::VariableState& get_variable(const std::string &variable_id) const;
const ov::intel_gpu::VariableStateInfo& get_variable_info(const std::string &variable_id) const;
const ov::intel_gpu::VariablesMap& get_variables() const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "prepare_buffer_fusing.h"
#include "intel_gpu/primitives/read_value.hpp"
#include "pooling_inst.h"
#include "primitive_inst.h"
#include "activation_inst.h"
#include "concatenation_inst.h"
#include "crop_inst.h"
#include "eltwise_inst.h"
#include "read_value_inst.h"
#include "reshape_inst.h"
#include "depth_to_space_inst.h"
#include "resample_inst.h"
Expand Down Expand Up @@ -602,5 +604,36 @@ void prepare_buffer_fusing::run(program& p) {

node.can_be_optimized(can_reshape_be_optimized(node));
});
program_helpers::do_for_types<read_value>(*node, [](read_value_node& node) {
// Current implementation allows to avoid copy on read_value primitive
// only in cases when it has single user
// Otherwise we may face an issue with exeuction of read_value users and assign to the same variable
// Graph below is an example of unsupported case
// ┌────────┐ ┌───────┐
// │ Param1 │ │ Const │
// └───┬────┘ └───┬───┘
// │ │
// │ ┌────┴──────┐
// .......│.........│ ReadValue │
// . │ └────┬─────┬┘
// . │ │ │
// . │ ┌─────┐ │ │
// . └───┤ Add ├────┘ │
// . └──┬──┘ │
// . │ │
// . │ │
// . ┌────────┐ │ ┌─────┐ │
// ..│ Assign ├──┴────┤ Add ├──┘
// └────────┘ └──┬──┘
//
//
// ┌────┴──────┐
// │ Result │
// └───────────┘
// If read_value here returns virable memory w/o copy, then based on Add-s and Assign execution order we may have different results
// TODO: Allow optimizations for the case above too. Looks like it can be achieved by more careful
// topological sort (i.e. if we ensure that all read_value users are completed before assign is run)
node.can_be_optimized(node.get_users().size() == 1);
});
}
}
15 changes: 10 additions & 5 deletions src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,22 @@ struct read_value_impl : public typed_primitive_impl<read_value> {
}

auto& variable = instance.get_network().get_variable(variable_id);
auto &stream = instance.get_network().get_stream();

OPENVINO_ASSERT(variable.get_layout() == instance.get_output_layout(),
"[GPU] Layout mismatch: variable layout: ", variable.get_layout().to_short_string(),
" read_value output layout: ", instance.get_output_layout().to_short_string());

instance.set_output_memory(variable.get_memory(), false, 0);

if (!variable.is_set()) {
auto &stream = instance.get_network().get_stream();
const auto ev_set_output = instance.output_memory().fill(stream, 0);
return ev_set_output;
if (instance.get_impl_params()->input_layouts.size() > 0) {
variable.get_memory()->copy_from(stream, instance.dep_memory(0), true);
} else {
variable.get_memory()->fill(stream, 0);
}
}

if (!instance.can_be_optimized()) {
return instance.output_memory(0).copy_from(stream, *variable.get_memory(), false);
}

return instance.get_network().get_stream().create_user_event(true);
Expand Down
5 changes: 5 additions & 0 deletions src/plugins/intel_gpu/src/graph/include/read_value_inst.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ class typed_primitive_inst<read_value> : public typed_primitive_inst_base<read_v

void save(cldnn::BinaryOutputBuffer& ob) const override;
void load(cldnn::BinaryInputBuffer& ib) override;

void update_output_memory() override;

protected:
void on_execute() override;
};

using read_value_inst = typed_primitive_inst<read_value>;
Expand Down
14 changes: 9 additions & 5 deletions src/plugins/intel_gpu/src/graph/network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ void network::set_arguments() {
// In that case some_op is static and we may want to set arguments once,
// but dynamic optimized out reshape means that output buffer of reshape is unavailable
// and attempt to set args will fail.
if (dep.first->can_be_optimized() && dep.first->is_dynamic())
if (dep.first->can_be_optimized() && (dep.first->is_dynamic() || dep.first->get_node().is_type<read_value>()))
can_set_args = false;
}

Expand Down Expand Up @@ -1649,10 +1649,14 @@ void network::transfer_memory_to_device(std::shared_ptr<primitive_inst> instance
void network::set_variable(const std::string& name, const std::shared_ptr<ov::intel_gpu::VariableState>& variable) {
GPU_DEBUG_TRACE_DETAIL << "Set variable " << name << " " << variable->get_layout().to_short_string() << std::endl;
_variables_states[name] = variable;
for (auto& inst : _variable_state_primitives.at(name)) {
if (variable->get_layout().is_static())
inst->set_output_memory(variable->get_memory(), false, 0);
}
// for (auto& inst : _variable_state_primitives.at(name)) {
// if (variable->get_layout().is_static())
// inst->set_output_memory(variable->get_memory(), false, 0);
// }
}

bool network::has_variable(const std::string &variable_id) const {
return _variables_states.find(variable_id) != _variables_states.end();
}

ov::intel_gpu::VariableState& network::get_variable(const std::string &variable_id) const {
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_gpu/src/graph/primitive_inst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,6 @@ event::ptr primitive_inst::realloc_if_needed() {
std::string variable_id = stateful_prim->variable_id();
auto variable = get_network().get_variable(variable_id);
variable.set_layout(actual_layout);
return ev;
}

bool can_reuse_buffer = _outputs[0] && actual_layout.count() <= max_output_layout_size;
Expand Down
14 changes: 13 additions & 1 deletion src/plugins/intel_gpu/src/graph/read_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace cldnn {
GPU_DEFINE_PRIMITIVE_TYPE_ID(read_value)

read_value_inst::typed_primitive_inst(network& network, const read_value_node& node) :
parent(network, node, false),
parent(network, node, !node.can_be_optimized() && (node.get_output_layout().is_static() || node.get_output_layout().has_upper_bound())),
memory_state::variable{node.get_primitive()->variable_id} {
}

Expand All @@ -31,6 +31,18 @@ std::string read_value_inst::to_string(const read_value_node& node) {
return primitive_description.str();
}

void read_value_inst::on_execute() {
update_output_memory();
}

void read_value_inst::update_output_memory() {
if (!can_be_optimized() || !get_network().has_variable(variable_id()))
return;

const auto& variable = get_network().get_variable(variable_id());
set_output_memory(variable.get_memory(), false, 0);
}

void read_value_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ void SyncInferRequest::allocate_states() {
const auto& network = m_graph->get_network();
const auto& variables_info = network->get_variables_info();
for (auto& vi : variables_info) {
auto variable = std::make_shared<VariableState>(vi.second, network->get_engine(), network->get_shape_predictor());
auto variable = std::make_shared<VariableState>(vi.second, network->get_engine(), *network->get_shape_predictor());
m_variables.emplace(vi.first, variable);
}
}
Expand Down
21 changes: 21 additions & 0 deletions src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,24 @@ TEST(TensorTest, smoke_canReallocateDeviceInputForHostTensor) {
ASSERT_NO_THROW(inf_req.infer());
}

TEST(VariablesTest, smoke_canSetStateTensor) {
auto ov = ov::Core();
const ov::Shape virable_shape = {1, 3, 2, 4};
const ov::Shape input_shape = {1, 3, 2, 4};
const ov::element::Type et = ov::element::f16;
auto model = ngraph::builder::subgraph::makeReadConcatSplitAssign(input_shape, et);
auto compiled_model = ov.compile_model(model, ov::test::utils::DEVICE_GPU);
auto request = compiled_model.create_infer_request();

ov::Tensor variable_tensor(et, virable_shape);
ov::Tensor input_tensor(et, input_shape);

auto variables = request.query_state();
ASSERT_EQ(variables.size(), 1);
auto variable = variables.front();
ASSERT_EQ(variable.get_name(), "v0");
auto default_state_tensor = variable.get_state();
ASSERT_EQ(default_state_tensor.get_shape(), virable_shape);

ASSERT_NO_THROW(request.infer());
}
12 changes: 6 additions & 6 deletions src/plugins/intel_gpu/tests/unit/test_cases/variable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ struct variable_test : public ::testing::TestWithParam<VariableParams<T>> {

cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);

auto variable = std::make_shared<VariableState>(VariableStateInfo{"v0", variable_layout}, engine, network->get_shape_predictor());
auto variable = std::make_shared<VariableState>(VariableStateInfo{"v0", variable_layout}, engine, *network->get_shape_predictor());
network->set_variable("v0", variable);
network->set_input_data("input", input_data);

Expand Down Expand Up @@ -129,7 +129,7 @@ void test_exception_on_wrong_layout(bool is_caching_test) {

cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);

auto variable = std::make_shared<VariableState>(VariableStateInfo{"v0", variable_layout}, engine, network->get_shape_predictor());
auto variable = std::make_shared<VariableState>(VariableStateInfo{"v0", variable_layout}, engine, *network->get_shape_predictor());
network->set_variable("v0", variable);
network->set_input_data("input", input_data);
network->set_input_data("wrong_input", wrong_input_data);
Expand Down Expand Up @@ -167,7 +167,7 @@ void test_different_output_data_type(bool is_caching_test) {
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);

auto variable = std::make_shared<VariableState>(VariableStateInfo{"v0", variable_layout}, engine, network->get_shape_predictor());
auto variable = std::make_shared<VariableState>(VariableStateInfo{"v0", variable_layout}, engine, *network->get_shape_predictor());
network->set_variable("v0", variable);
network->set_input_data("input", input_data);

Expand Down Expand Up @@ -223,9 +223,9 @@ void test_variables_are_preserved_across_inferences(bool is_caching_test) {
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);


auto variable1 = std::make_shared<VariableState>(VariableStateInfo{"v1", variable_layout}, engine, network->get_shape_predictor());
auto variable2 = std::make_shared<VariableState>(VariableStateInfo{"v2", variable_layout}, engine, network->get_shape_predictor());
auto variable3 = std::make_shared<VariableState>(VariableStateInfo{"v_result", variable_layout}, engine, network->get_shape_predictor());
auto variable1 = std::make_shared<VariableState>(VariableStateInfo{"v1", variable_layout}, engine, *network->get_shape_predictor());
auto variable2 = std::make_shared<VariableState>(VariableStateInfo{"v2", variable_layout}, engine, *network->get_shape_predictor());
auto variable3 = std::make_shared<VariableState>(VariableStateInfo{"v_result", variable_layout}, engine, *network->get_shape_predictor());
network->set_variable("v1", variable1);
network->set_variable("v2", variable2);
network->set_variable("v_result", variable3);
Expand Down

0 comments on commit 582f72c

Please sign in to comment.