Skip to content

Commit

Permalink
[GPU] network code cleanup (openvinotoolkit#26908)
Browse files Browse the repository at this point in the history
### Details:
 - Removed few unnecessary methods
 - Refactor `network::execute()`
  • Loading branch information
vladimir-paramuzov authored Oct 7, 2024
1 parent 2167b03 commit de94a33
Show file tree
Hide file tree
Showing 11 changed files with 96 additions and 175 deletions.
32 changes: 0 additions & 32 deletions src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,34 +116,10 @@ struct network {

std::vector<std::shared_ptr<primitive_inst>> const& get_outputs() { return _outputs; }

const std::vector<std::shared_ptr<const primitive_inst>>& get_outputs() const {
return reinterpret_cast<const std::vector<std::shared_ptr<const primitive_inst>>&>(_outputs);
}

network_output get_output(const primitive_id& output_id) {
event::ptr evt;
if (get_stream().get_queue_type() == QueueTypes::out_of_order || _enable_profiling)
evt = get_primitive_event(output_id);
return network_output(evt, get_output_memory(output_id), get_stream_ptr(), get_output_layout(output_id));
}
layout get_node_output_layout(const primitive_id& output_id) const;
memory::ptr get_output_memory(const primitive_id& output_id);
layout get_output_layout(const primitive_id& output_id) const;
std::vector<layout> get_input_layouts() const;

/// @brief Returns the list of primitive ids before and after graph optimization.
/// @details If primitive was not optimized, the old and actual id will be the same.
/// @n If primitive was optimized during graph optimization, the actual id will be "_optimized_".
std::map<primitive_id, primitive_id> get_all_primitives() const {
auto primitive_ids = get_all_primitive_ids();
auto primitive_org_ids = get_all_primitive_org_ids();
std::map<primitive_id, primitive_id> result;
for (decltype(primitive_org_ids.size()) i = 0; i < primitive_org_ids.size(); i++) {
result.emplace(primitive_org_ids[i], primitive_ids[i]);
}
return result;
}

/// @brief Returns the list of @ref event for the primitives that were executed in network.
std::map<primitive_id, event::ptr> get_executed_primitives() const {
auto primitive_ids = get_executed_primitive_ids();
Expand Down Expand Up @@ -201,7 +177,6 @@ struct network {
void configure_primitives_second_output();
void build_insts_deps();
uint32_t get_id() const { return net_id; }
uint32_t get_local_id() const { return _local_net_id; }
stream& get_stream() const { return *_stream; }
stream::ptr get_stream_ptr() const { return _stream; }
bool is_internal() const { return _internal; }
Expand All @@ -219,7 +194,6 @@ struct network {
const ov::intel_gpu::VariableStateInfo& get_variable_info(const std::string &variable_id) const;
const ov::intel_gpu::VariablesMap& get_variables() const;
const ov::intel_gpu::VariablesInfoMap& get_variables_info() const;
std::vector<primitive_id> get_kv_cache_ids() const { return kv_cache_ids; }

const ExecutionConfig& get_config() const { return _config; }

Expand All @@ -243,8 +217,6 @@ struct network {
bool _is_dynamic = false;
bool _enable_profiling = false;
bool _reset_arguments;
uint32_t _local_net_id = 0; // This is for thread-safe deserialization. 'net_id' is globally unique,
// but '_local_net_id' is unique only in each intel_gpu::Graph.

std::unordered_map<primitive_id, std::shared_ptr<primitive_inst>> _primitives;
std::vector<shared_mem_type> _in_out_shared_mem_types;
Expand All @@ -255,10 +227,8 @@ struct network {

ov::intel_gpu::VariablesMap _variables_states;
ov::intel_gpu::VariablesInfoMap _variables_state_info;
std::vector<primitive_id> kv_cache_ids;

program::primitives_info _prims_info;
std::map<primitive_id, primitive_id> _ext_id_mapping;
size_t _weights_cache_capacity = 1;

std::unordered_map<primitive_id, event::ptr> _events;
Expand All @@ -272,9 +242,7 @@ struct network {
void allocate_primitive_instance(program_node const& node);
void transfer_memory_to_device(std::shared_ptr<primitive_inst> instance, program_node const& node);
void add_to_exec_order(const primitive_id& id);
std::shared_ptr<primitive_inst> find_in_internal_networks(const primitive_id& id) const;
std::shared_ptr<primitive_inst> find_primitive(const primitive_id& id) const;
void check_names();
void add_default_output_chains();
void calculate_weights_cache_capacity();
output_chains_map::iterator add_output_chain(std::shared_ptr<primitive_inst>& p_inst);
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/graph/kv_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ GPU_DEFINE_PRIMITIVE_TYPE_ID(kv_cache)
kv_cache_inst::typed_primitive_inst(network& network, const kv_cache_node& node) :
parent{network, node, false},
memory_state::variable{node.get_primitive()->variable_info.variable_id} {
kv_cache_id = network.get_kv_cache_ids().size();
thread_local size_t kv_cache_counter = 0;
kv_cache_id = kv_cache_counter++;
}

layout kv_cache_inst::calc_output_layout(const kv_cache_node& node, kernel_impl_params const& impl_param) {
Expand Down
89 changes: 20 additions & 69 deletions src/plugins/intel_gpu/src/graph/network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,6 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo
calculate_weights_cache_capacity();
allocate_primitives();
configure_primitives_second_output();
if (!_program->is_loaded_from_cache())
check_names();
build_insts_deps();
build_exec_order();
validate_primitives();
Expand Down Expand Up @@ -333,11 +331,7 @@ void network::reset_execution(bool wait) {

event::ptr network::set_input_data(const primitive_id& id, memory::ptr data) {
GPU_DEBUG_TRACE_DETAIL << "Set input " << id << " " << data->get_layout().to_short_string() << std::endl;
std::shared_ptr<primitive_inst> primitive_inst;

primitive_inst = find_primitive(id);

OPENVINO_ASSERT(primitive_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id);
auto primitive_inst = find_primitive(id);

if (primitive_inst->type() != input_layout::type_id()) {
CLDNN_ERROR_MESSAGE(id, "primitive " + id + " is not an input");
Expand Down Expand Up @@ -481,11 +475,8 @@ network::output_chains_map::iterator network::add_output_chain(std::shared_ptr<p

std::vector<event::ptr> network::set_output_memory(const primitive_id& id, memory::ptr mem_new) {
GPU_DEBUG_TRACE_DETAIL << "Set output " << id << " " << mem_new->get_layout().to_short_string() << std::endl;
std::shared_ptr<primitive_inst> p_inst;
std::vector<event::ptr> ret_ev;
p_inst = find_primitive(id);

OPENVINO_ASSERT(p_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id);
std::shared_ptr<primitive_inst> p_inst = find_primitive(id);

auto iter = std::find(_outputs.begin(), _outputs.end(), p_inst);
if (iter == _outputs.end())
Expand Down Expand Up @@ -513,35 +504,10 @@ std::vector<event::ptr> network::set_output_memory(const primitive_id& id, memor
return ret_ev;
}

void cldnn::network::check_names() {
for (auto const& prim : _primitives) {
if (find_in_internal_networks(prim.first) != nullptr)
CLDNN_ERROR_MESSAGE("Network", "Found primitive with id: " + prim.first + "in anotother network.");
}
}

std::shared_ptr<primitive_inst> cldnn::network::find_primitive(const primitive_id& id) const {
if (_primitives.find(id) != _primitives.end())
return _primitives.at(id);

return find_in_internal_networks(id);
}

std::shared_ptr<primitive_inst> cldnn::network::find_in_internal_networks(const primitive_id& id) const {
std::shared_ptr<primitive_inst> ret;

for (auto const& prim : _primitives) {
if (prim.second->type() == condition::type_id()) { // currently only condition inst contains mini networks
auto cond_inst = std::static_pointer_cast<condition_inst>(prim.second);
ret = cond_inst->get_net_true()->find_primitive(id);
if (ret != nullptr)
return ret;
ret = cond_inst->get_net_false()->find_primitive(id);
if (ret != nullptr)
return ret;
}
}
return nullptr;
auto it = _primitives.find(id);
OPENVINO_ASSERT(it != _primitives.end(), "[GPU] Network doesn't contain primitive ", id);
return it->second;
}

std::string network::get_primitive_info(const primitive_id& id) const {
Expand All @@ -552,9 +518,6 @@ std::string network::get_primitive_info(const primitive_id& id) const {
bool network::does_node_need_lockable_output(const primitive_id& id) const {
auto prim_inst = find_primitive(id);

OPENVINO_ASSERT(prim_inst, "[GPU] Can't get implementation type, since topology ",
"doesn't contain primitive with requested id: ", id);

const auto& node = prim_inst->get_node();
if (node.is_type<input_layout>()) {
for (const auto& user : node.get_users()) {
Expand All @@ -574,15 +537,6 @@ std::string network::get_implementation_info(const primitive_id& id) const {
return _program->get_implementation_info(id);
}

layout network::get_node_output_layout(const primitive_id& output_id) const {
auto res = std::find_if(_outputs.begin(), _outputs.end(), [&](const std::shared_ptr<primitive_inst>& v) {
return v->id() == output_id;
});
OPENVINO_ASSERT(res != _outputs.end(), "[GPU] Couldn't get output layout for ", output_id, ". Output with such name is not found in the outputs list");

return (*res)->get_node_output_layout();
}

memory::ptr network::get_output_memory(const primitive_id& output_id) {
return get_primitive(output_id)->output_memory_ptr();
}
Expand Down Expand Up @@ -729,17 +683,6 @@ void network::add_to_exec_order(const primitive_id& id) {
}

std::map<primitive_id, network_output> network::execute(const std::vector<event::ptr>& dependencies) {
execute_impl(dependencies);

auto output_ids = get_output_ids();
std::map<primitive_id, network_output> result;
for (auto& id : output_ids) {
result.emplace(id, get_output(id));
}
return result;
}

void network::execute_impl(const std::vector<event::ptr>& events) {
OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "NetworkImpl::Execute");
NETWORK_DEBUG(*this);

Expand Down Expand Up @@ -779,6 +722,21 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
// in some cases.
auto surf_lock = surfaces_lock::create(get_engine().type(), in_out_mem, get_stream());

execute_impl(dependencies);

std::map<primitive_id, network_output> result;
for (auto& inst : _outputs) {
event::ptr ev = nullptr;
const auto& id = inst->id();
if (get_stream().get_queue_type() == QueueTypes::out_of_order || _enable_profiling)
ev = _events.at(id);

result.emplace(id, network_output(ev, inst->output_memory_ptr(0), get_stream_ptr(), inst->get_output_layout(0)));
}
return result;
}

void network::execute_impl(const std::vector<event::ptr>& events) {
set_arguments();

// This extra flush command is needed for dynamic models in both cases of out_of_order / in_order operating mode
Expand Down Expand Up @@ -904,10 +862,6 @@ const program::graph_optimizer_info& network::get_optimizer_passes_info() const
}

std::map<primitive_id, primitive_id> network::get_ext_id_mapping() const {
if (_program == nullptr) {
return _ext_id_mapping;
}

std::map<primitive_id, primitive_id> result;
for (auto& prim : _primitives) {
result.emplace(prim.first, prim.second->get_node().get_primitive()->origin_op_name);
Expand Down Expand Up @@ -1008,9 +962,6 @@ void network::allocate_primitive_instance(program_node const& node) {
if (node.is_type<data>())
_data_outputs.push_back(inst);
}
if (node.is_type<kv_cache>()) {
kv_cache_ids.push_back(node.id());
}
if (auto state_prim = std::dynamic_pointer_cast<memory_state::variable>(inst)) {
auto prim = inst->get_node().get_primitive();
set_variables_state_info(state_prim->variable_id(), node.get_output_layout(0), state_prim->get_user_specified_type(), prim.get());
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_gpu/src/plugin/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,6 @@ void Graph::update_profiling_info() {
};

std::map<cldnn::primitive_id, cldnn::event::ptr> executedPrimitives = get_network()->get_executed_primitives();
auto allPrimitives = get_network()->get_all_primitives();

// Get profiling info for all layers
for (auto &profiledID : profilingIDs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,15 @@ TEST(reorder_inputs, impl_forcing_basic_format) {
7.f, 3.f, -2.f, -1.f });

network.set_input_data("input", input);
network.execute();
auto outputs = network.execute();

const auto& prog = network.get_program();
auto& pool_node = prog->get_node("pool");
auto pool_layout = pool_node.get_output_layout();

ASSERT_EQ(pool_layout.format.value, format::yxfb);

auto out_mem = network.get_output("pool").get_memory();
auto out_mem = outputs.at("pool").get_memory();
cldnn::mem_lock<float> out_mem_ptr(out_mem, get_test_stream());

ASSERT_EQ(out_mem_ptr.size(), 4u);
Expand Down Expand Up @@ -239,7 +239,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) {
7.f, 3.f, -2.f, -1.f });

network.set_input_data("input", input);
network.execute();
auto outputs = network.execute();

auto prog = network.get_program();
auto& node = prog->get_node("actv");
Expand All @@ -250,7 +250,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) {
ASSERT_EQ(actv_layout.format.value, format::yxfb);
ASSERT_EQ(kernel_name, actv_impl.kernel_name);

auto out_mem = network.get_output("actv").get_memory();
auto out_mem = outputs.at("actv").get_memory();
cldnn::mem_lock<float> out_mem_ptr(out_mem, get_test_stream());

ASSERT_EQ(out_mem_ptr.size(), 8u);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1031,9 +1031,9 @@ struct concat_gpu_4d : public concat_gpu {
network.set_input_data(input_ids[i].pid, in_memory[i]);
}

network.execute();
auto outputs = network.execute();

auto out_mem = network.get_output("concat").get_memory();
auto out_mem = outputs.at("concat").get_memory();
cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());

for (size_t bi = 0; bi < batch_num; bi++) {
Expand Down Expand Up @@ -1117,9 +1117,9 @@ struct concat_gpu_4d_axis3 : public concat_axis3_gpu {
network.set_input_data(input_ids[i].pid, in_memory[i]);
}

network.execute();
auto outputs = network.execute();

auto out_mem = network.get_output("concat").get_memory();
auto out_mem = outputs.at("concat").get_memory();
cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());

for (size_t bi = 0; bi < batch_num; bi++) {
Expand Down Expand Up @@ -1283,9 +1283,9 @@ struct concat_id_conv_gpu_4d : public concat_gpu {
network.set_input_data(input_ids[i].pid, in_memory[i]);
}

network.execute();
auto outputs = network.execute();

auto out_mem = network.get_output("conv").get_memory();
auto out_mem = outputs.at("conv").get_memory();
cldnn::mem_lock<OutputT> out_ptr(out_mem, get_test_stream());
ASSERT_EQ(out_mem->get_layout().format, fmt);

Expand Down Expand Up @@ -1420,13 +1420,13 @@ struct concat_gpu_4d_implicit : public concat_gpu {
for (size_t i = 0; i < in_features.size(); i++) {
concat_network->set_input_data(input_ids[i], in_memory[i]);
}
concat_network->execute();
auto outputs = concat_network->execute();

bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network->get_primitive("concat"))->can_be_optimized();
EXPECT_EQ(concat_opt_enabled, concat_opt_result);

return concat_network->get_output("reorder").get_memory();
return outputs.at("reorder").get_memory();
}

std::vector<std::vector<std::vector<std::vector<std::vector<Type>>>>> generate_input() {
Expand Down Expand Up @@ -1640,13 +1640,13 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu {
for (size_t i = 0; i < in_features.size(); i++) {
concat_network.set_input_data(input_ids[i], in_memory[i]);
}
concat_network.execute();
auto outputs = concat_network.execute();

bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network.get_primitive("concat"))->node->can_be_optimized();
EXPECT_EQ(concat_opt_enabled, concat_opt_result);

return concat_network.get_output("reorder").get_memory();
return outputs.at("reorder").get_memory();
}

std::vector<std::vector<std::vector<std::vector<std::vector<Type>>>>> generate_input() {
Expand Down Expand Up @@ -1803,7 +1803,7 @@ struct concat_gpu_4d_explicit : public concat_gpu {
for (size_t i = 0; i < 4; i++) {
concat_network.set_input_data(input_ids[i], in_memory[i]);
}
concat_network.execute();
auto outputs = concat_network.execute();

bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network.get_primitive("concat"))->node->can_be_optimized();
Expand All @@ -1813,7 +1813,7 @@ struct concat_gpu_4d_explicit : public concat_gpu {
if (concat_opt_enabled && batch_num > 1) concat_opt_result = !concat_opt_result;
EXPECT_EQ(concat_opt_enabled, concat_opt_result);

return concat_network.get_output("reorder").get_memory();
return outputs.at("reorder").get_memory();
}

std::vector<std::vector<std::vector<std::vector<std::vector<Type>>>>> generate_input() {
Expand Down
Loading

0 comments on commit de94a33

Please sign in to comment.