Skip to content

Commit

Permalink
[GPU] Remove use of legacy names
Browse files Browse the repository at this point in the history
  • Loading branch information
Lyamin-Roman committed Mar 11, 2024
1 parent e6dc086 commit e1a4465
Show file tree
Hide file tree
Showing 10 changed files with 209 additions and 176 deletions.
10 changes: 6 additions & 4 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@ class Graph final {
cldnn::engine& get_engine() const { return m_context->get_engine(); }
const ExecutionConfig& get_config() const { return m_config; }

const std::map<std::string, cldnn::layout>& get_input_layouts() const { return m_input_layouts; }
const std::map<size_t, cldnn::layout>& get_input_layouts() const { return m_input_layouts; }
std::shared_ptr<cldnn::network> get_network() const;

std::string out_name_to_internal(std::string out_port_name) const;
std::vector<cldnn::primitive_id> input_port_index_to_internal(size_t input_port_index) const;
std::string out_port_index_to_internal(size_t out_port_index) const;

void wait(Stage stage_mask) {
std::unique_lock<std::mutex> lock(m_infer_mutex);
Expand Down Expand Up @@ -84,12 +85,13 @@ class Graph final {

std::shared_ptr<cldnn::network> m_network;
std::map<std::string, cldnn::primitive_id> primitiveIDs;
std::map<std::string, std::vector<cldnn::primitive_id>> prevPrimitiveIDs;
std::map<size_t, std::vector<cldnn::primitive_id>> inputPrimitiveIDs;
std::map<size_t, cldnn::primitive_id> prevPrimitiveIDs;

std::map<cldnn::primitive_id, std::pair<std::string, PerfCounter>> perfMap;
std::vector<cldnn::primitive_id> profilingIDs;

std::map<std::string, cldnn::layout> m_input_layouts;
std::map<size_t, cldnn::layout> m_input_layouts;

void build(std::shared_ptr<cldnn::program> program);
std::shared_ptr<ov::Model> get_runtime_model(std::vector<cldnn::primitive_info>& pi, bool filter_const_primitives = true);
Expand Down
13 changes: 10 additions & 3 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "openvino/core/node.hpp"
#include "openvino/runtime/profiling_info.hpp"
#include "openvino/op/parameter.hpp"

#include "intel_gpu/plugin/custom_layer.hpp"
#include "intel_gpu/runtime/engine.hpp"
Expand Down Expand Up @@ -91,22 +92,27 @@ class ProgramBuilder final {
static const cldnn::primitive_id m_postCustomLayerTag;

std::map<std::string, cldnn::primitive_id> primitive_ids;
std::map<std::string, std::vector<cldnn::primitive_id>> prevPrimitiveIDs;
std::map<size_t, std::vector<cldnn::primitive_id>> inputPrimitiveIDs;
std::map<size_t, cldnn::primitive_id> prevPrimitiveIDs;
std::map<cldnn::primitive_id, std::pair<std::string, PerfCounter>> perfMap;

std::vector<cldnn::primitive_id> profiling_ids;

std::map<std::string, cldnn::layout> inputLayouts;
std::map<size_t, cldnn::layout> inputLayouts;
using BlobCacheKey = std::tuple<const char*, ov::Shape, ov::element::Type>;
std::map<BlobCacheKey, cldnn::primitive_id> blobMemCache;

std::shared_ptr<cldnn::program> get_compiled_program() const;
std::shared_ptr<cldnn::topology> get_topology() const { return m_topology; }

const std::map<std::string, cldnn::layout>& get_input_layouts() const { return inputLayouts; }
const std::map<size_t, cldnn::layout>& get_input_layouts() const { return inputLayouts; }
cldnn::engine& get_engine() const { return m_engine; }
const ExecutionConfig& get_config() const { return m_config; }

int64_t get_parameter_index(const std::shared_ptr<ov::op::v0::Parameter>& parameter) const;
int64_t get_result_index(const ov::Output<ov::Node>& value) const;
int64_t get_result_index(const ov::Output<const ov::Node>& value) const;

bool is_op_supported(const std::shared_ptr<ov::Node>& op);

// Profiling utils
Expand Down Expand Up @@ -143,6 +149,7 @@ class ProgramBuilder final {
private:
static factories_map_t factories_map;
std::shared_ptr<cldnn::program> m_program;
std::shared_ptr<ov::Model> m_model;
ExecutionConfig m_config;
cldnn::engine& m_engine;
static std::mutex m_mutex;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,16 @@ class SyncInferRequest : public ov::ISyncInferRequest {
private:
void check_tensors() const override;

std::unordered_map<std::string, TensorWrapper> m_user_inputs;
std::unordered_map<std::string, TensorWrapper> m_user_outputs;
std::unordered_map<size_t, TensorWrapper> m_user_inputs;
std::unordered_map<size_t, TensorWrapper> m_user_outputs;

std::unordered_map<std::string, TensorWrapper> m_plugin_inputs;
std::unordered_map<std::string, TensorWrapper> m_plugin_outputs;
std::unordered_map<size_t, TensorWrapper> m_plugin_inputs;
std::unordered_map<size_t, TensorWrapper> m_plugin_outputs;

std::unordered_map<std::string, ov::Output<const ov::Node>> m_input_ports_map;
std::unordered_map<std::string, ov::Output<const ov::Node>> m_output_ports_map;
std::unordered_map<std::string, std::string> m_output_names_map;
std::unordered_map<size_t, ov::Output<const ov::Node>> m_input_ports_map;
std::unordered_map<size_t, ov::Output<const ov::Node>> m_output_ports_map;

std::unordered_map<size_t, std::string> m_output_names_map;

std::map<cldnn::primitive_id, cldnn::network_output> m_internal_outputs;
VariablesMap m_variables;
Expand All @@ -90,9 +91,12 @@ class SyncInferRequest : public ov::ISyncInferRequest {
bool m_use_external_queue = false;

void prepare_state(const std::string& name, const std::shared_ptr<VariableStateBase>& variable);
std::vector<cldnn::event::ptr> prepare_input(const std::string& name, const ov::Output<const ov::Node>& port, const TensorWrapper& user_tensor_wrapper);
std::vector<cldnn::event::ptr> prepare_output(const std::string& name, const ov::Output<const ov::Node>& port, const TensorWrapper& user_tensor_wrapper);
std::vector<cldnn::event::ptr> prepare_batched_input(const std::string& name,
std::vector<cldnn::event::ptr> prepare_input(const std::string& internal_name,
size_t input_idx,
const ov::Output<const ov::Node>& port,
const TensorWrapper& user_tensor_wrapper);
std::vector<cldnn::event::ptr> prepare_output(size_t output_idx, const ov::Output<const ov::Node>& port, const TensorWrapper& user_tensor_wrapper);
std::vector<cldnn::event::ptr> prepare_batched_input(size_t input_idx,
const ov::Output<const ov::Node>& port,
const std::vector<ov::SoPtr<ov::ITensor>>& user_tensors);

Expand All @@ -108,8 +112,8 @@ class SyncInferRequest : public ov::ISyncInferRequest {
void allocate_inputs();
void allocate_outputs();
void allocate_states();
void allocate_input(const ov::Output<const ov::Node>& port, const std::string& name);
void allocate_output(const ov::Output<const ov::Node>& port, const std::string& name);
void allocate_input(const ov::Output<const ov::Node>& port, size_t input_idx);
void allocate_output(const ov::Output<const ov::Node>& port, size_t output_idx);
cldnn::event::ptr copy_output_data(cldnn::memory::ptr src, const ov::ITensor& dst) const;

void init_mappings();
Expand Down
2 changes: 0 additions & 2 deletions src/plugins/intel_gpu/src/graph/network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -642,8 +642,6 @@ void cldnn::network::check_names() {
}

std::shared_ptr<primitive_inst> cldnn::network::find_primitive(const primitive_id& id) const {
std::shared_ptr<primitive_inst> ret;

if (_primitives.find(id) != _primitives.end())
return _primitives.at(id);

Expand Down
46 changes: 28 additions & 18 deletions src/plugins/intel_gpu/src/plugin/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Graph::Graph(std::shared_ptr<ov::Model> model, const RemoteContextImpl::Ptr& con
build(program_builder->get_compiled_program());

primitiveIDs = program_builder->primitive_ids;
inputPrimitiveIDs = program_builder->inputPrimitiveIDs;
prevPrimitiveIDs = program_builder->prevPrimitiveIDs;
profilingIDs = program_builder->profiling_ids;
perfMap = program_builder->perfMap;
Expand All @@ -67,6 +68,7 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context

ib >> m_input_layouts;
ib >> primitiveIDs;
ib >> inputPrimitiveIDs;
ib >> prevPrimitiveIDs;
ib >> profilingIDs;
{
Expand Down Expand Up @@ -104,6 +106,7 @@ Graph::Graph(std::shared_ptr<Graph> graph, uint16_t stream_id)
, m_config(graph->m_config)
, m_stream_id(stream_id)
, primitiveIDs(graph->primitiveIDs)
, inputPrimitiveIDs(graph->inputPrimitiveIDs)
, prevPrimitiveIDs(graph->prevPrimitiveIDs)
, perfMap(graph->perfMap)
, profilingIDs(graph->profilingIDs)
Expand Down Expand Up @@ -444,6 +447,7 @@ void Graph::export_model(cldnn::BinaryOutputBuffer &ob) {

ob << m_input_layouts;
ob << primitiveIDs;
ob << inputPrimitiveIDs;
ob << prevPrimitiveIDs;
ob << profilingIDs;
{
Expand Down Expand Up @@ -739,29 +743,35 @@ std::shared_ptr<cldnn::network> Graph::get_network() const {
return m_network;
}

std::string Graph::out_name_to_internal(std::string out_port_name) const {
auto networkOutputsIDs = get_network()->get_output_ids();
auto allPrimitiveIds = get_network()->get_all_primitives();
std::vector<cldnn::primitive_id> Graph::input_port_index_to_internal(size_t input_port_index) const {
OPENVINO_ASSERT(inputPrimitiveIDs.count(input_port_index) != 0 && !inputPrimitiveIDs.at(input_port_index).empty(),
"[GPU] Internal name of input primitive not found at index ", input_port_index);
return inputPrimitiveIDs.at(input_port_index);
}

std::string Graph::out_port_index_to_internal(size_t out_port_index) const {
const auto& networkOutputsIDs = get_network()->get_output_ids();
auto check_output = [&networkOutputsIDs](const cldnn::primitive_id& id) {
return std::find(networkOutputsIDs.begin(), networkOutputsIDs.end(), id) != networkOutputsIDs.end();
};

OPENVINO_ASSERT(prevPrimitiveIDs.count(out_port_index) != 0,
"[GPU] Internal name of output primitive not found for index ", out_port_index);
cldnn::primitive_id outputID = prevPrimitiveIDs.at(out_port_index);

// Find correct output ID. Start with name stored in IR.
if (primitiveIDs.find(out_port_name) == primitiveIDs.end()) {
OPENVINO_THROW("output with name ", out_port_name, " was not found in primitiveIDs");
if (check_output(outputID)) {
return outputID;
}
std::string outputID = primitiveIDs.at(out_port_name);
while (std::find(networkOutputsIDs.begin(), networkOutputsIDs.end(), outputID) == networkOutputsIDs.end()) {
// If current ID isn't found in cldnn network outputs, get previous primitive id and try again.
auto prim = allPrimitiveIds.find(outputID);
if (prim == allPrimitiveIds.end()) {
OPENVINO_THROW("Unknown primitive id ", outputID);
}

if (prevPrimitiveIDs.at(outputID).size() != 1 || prim->second != "_optimized_") {
OPENVINO_THROW("Unable to find parent for output primitive ", outputID);
}
outputID = prevPrimitiveIDs.at(outputID)[0];
OPENVINO_ASSERT(primitiveIDs.find(outputID) != primitiveIDs.end(),
"[GPU] Output with name ", outputID, " was not found in primitiveIDs");
outputID = primitiveIDs.at(outputID);

if (check_output(outputID)) {
return outputID;
}

return outputID;
OPENVINO_THROW("[GPU] Unable to map output port index ", out_port_index, " to the internal primitive id");
}

} // namespace intel_gpu
Expand Down
8 changes: 6 additions & 2 deletions src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ static void CreateParameterOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v
// look at the expected color format of this input
auto input_name = layer_type_name_ID(op);
cldnn::layout input_layout(input_pshape, element_type, input_format);
int64_t port_index = p.get_parameter_index(op);
OPENVINO_ASSERT(port_index != -1, "[GPU] Parameter port index for ", input_name, " not found");

auto is_convert_color_type = [](const std::shared_ptr<ov::Node> &node) {
return ov::is_type<ov::op::v8::NV12toRGB>(node) ||
Expand Down Expand Up @@ -83,15 +85,16 @@ static void CreateParameterOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v
size_t batch = input_pshape[0].get_length();
input_layout.format = cldnn::format::nv12;
input_layout.set_partial_shape({ 1, input_pshape[1], input_pshape[2], input_pshape[3] });
p.inputLayouts.insert({ port_index, input_layout });

std::string suffix = "";
std::vector<cldnn::input_info> surfaces_inputs;
for (size_t i = 0; i < batch; ++i) {
if (batch > 1)
suffix = "_" + std::to_string(i);
std::string batched_name = input_name + suffix;
p.inputLayouts.insert({ op->get_friendly_name() + suffix, input_layout });
p.add_primitive(*op, cldnn::input_layout(batched_name, input_layout));
p.inputPrimitiveIDs[port_index].emplace_back(batched_name);

auto reorder_layout = input_layout;
reorder_layout.format = cldnn::format::bfyx;
Expand All @@ -111,9 +114,10 @@ static void CreateParameterOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v
p.primitive_ids[input_name] = "reorder:" + input_name + ProgramBuilder::m_preProcessTag;
} else {
auto reorder_name = "reorder:" + input_name + ProgramBuilder::m_preProcessTag;
p.inputLayouts.insert({ op->get_friendly_name(), input_layout });

p.add_primitive(*op, cldnn::input_layout(input_name, input_layout));
p.inputPrimitiveIDs[port_index] = { input_name };
p.inputLayouts.insert({ port_index, input_layout });

if (connected_to_quantize(op)) {
// Techically this reorder is not needed, but for some reason it impacts layout propagation logic
Expand Down
16 changes: 8 additions & 8 deletions src/plugins/intel_gpu/src/plugin/ops/result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,9 @@ static void CreateResultOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v0::
validate_inputs_count(op, {1});

auto prev = op->get_input_node_shared_ptr(0);
auto input_id = ov::op::util::get_ie_output_name(op->get_input_source_output(0));
if (input_id.empty()) {
input_id = prev->get_friendly_name();
if (prev->get_output_size() > 1) {
input_id += "." + std::to_string(op->get_input_source_output(0).get_index());
}
auto input_id = prev->get_friendly_name();
if (prev->get_output_size() > 1) {
input_id += "." + std::to_string(op->get_input_source_output(0).get_index());
}
auto inputs = p.GetInputInfo(op);

Expand All @@ -39,8 +36,11 @@ static void CreateResultOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v0::
inputs[0],
out_format,
out_data_type);
p.add_primitive(*op, reorder_primitive, {input_id, op->get_friendly_name()});
p.prevPrimitiveIDs[out_primitive_name] = {input_id};
p.add_primitive(*op, reorder_primitive, { input_id, op->get_friendly_name() });

int64_t port_index = p.get_result_index(op);
OPENVINO_ASSERT(port_index != -1, "[GPU] Result port index for ", input_id, " not found");
p.prevPrimitiveIDs[port_index] = input_id;
}

REGISTER_FACTORY_IMPL(v0, Result);
Expand Down
17 changes: 9 additions & 8 deletions src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const {
auto cloned_model = model->clone();

try {
std::set<std::pair<std::string, size_t>> batched_inputs;
std::set<std::pair<size_t, size_t>> batched_inputs;

auto tmp_model = cloned_model->clone();
ov::pass::Manager m;
Expand All @@ -675,11 +675,10 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const {
if (shape.size()) {
for (size_t s = 0; s < shape.size(); s++) {
if (ov::DimensionTracker::get_label(shape[s])) {
// batched dim for the input
auto batched_input_id = ov::op::util::get_ie_output_name(params[input_id]->output(0));
GPU_DEBUG_LOG << "[MAX_BATCH_SIZE] detected batched input " << batched_input_id
batched_inputs.insert(std::make_pair(input_id, s));
GPU_DEBUG_LOG << "[MAX_BATCH_SIZE] detected batched input " << input->get_friendly_name()
<< " with index " << input_id
<< "[" << s << "]" << std::endl;
batched_inputs.insert(std::make_pair(batched_input_id, s));
}
}
}
Expand All @@ -691,9 +690,11 @@ uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const {
}

try {
std::map<std::string, ov::PartialShape> shapes;
for (auto& param : cloned_model->get_parameters()) {
shapes[ov::op::util::get_ie_output_name(param->output(0))] = param->get_output_partial_shape(0);
std::map<size_t, ov::PartialShape> shapes;
const auto& params = cloned_model->get_parameters();
for (size_t input_id = 0; input_id < params.size(); input_id++) {
const auto& param = params[input_id];
shapes[input_id] = param->get_output_partial_shape(0);
}
for (const auto& input : batched_inputs)
shapes[input.first][input.second] = base_batch_size;
Expand Down
15 changes: 14 additions & 1 deletion src/plugins/intel_gpu/src/plugin/program_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr<ov::Model> model, cldnn::engine&
std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
std::shared_ptr<cldnn::ICompilationContext> compilation_context,
bool is_inner_program)
: m_config(config)
: m_model(model)
, m_config(config)
, m_engine(engine)
, queryMode(false)
, m_task_executor(task_executor)
Expand Down Expand Up @@ -354,6 +355,18 @@ bool ProgramBuilder::requires_new_shape_infer(const std::shared_ptr<ov::Node>& o
return false;
}

int64_t ProgramBuilder::get_parameter_index(const std::shared_ptr<ov::op::v0::Parameter>& parameter) const {
return m_model->get_parameter_index(parameter);
}

int64_t ProgramBuilder::get_result_index(const ov::Output<ov::Node>& value) const {
return m_model->get_result_index(value);
}

int64_t ProgramBuilder::get_result_index(const ov::Output<const ov::Node>& value) const {
return m_model->get_result_index(value);
}

// TODO: Does it make sense to add such method to ov core?
bool IsNodeOnConstPath(const std::shared_ptr<ov::Node>& node) {
std::set<std::shared_ptr<ov::Node>> nodes_processed = {};
Expand Down
Loading

0 comments on commit e1a4465

Please sign in to comment.