From fb746f1a112767234be9225525119eb2111f2027 Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Wed, 13 Apr 2022 14:04:06 +0900 Subject: [PATCH] post-optimization passes enablement (#62) * [GPU] Enable graph_initializations/calculate_prior_boxes for init_graph passes Signed-off-by: Andrew Park * [GPU] Enable post_input_reorder/post_optimize_weights for post-optimization passes Signed-off-by: Andrew Park * [GPU] Enable update_loop_primitive_map for post-optimization passes - check results with already enabled opt passes Signed-off-by: Andrew Park --- .../include/intel_gpu/graph/program.hpp | 2 - .../intel_gpu/primitives/concatenation.hpp | 2 +- ...nput.hpp.backup => lstm_dynamic_input.hpp} | 14 +++---- .../{split.hpp.backup => split.hpp} | 4 +- ...s.cpp.backup => calculate_prior_boxes.cpp} | 2 +- .../graph_optimizer/graph_initializations.cpp | 39 ++++++++----------- ...rder.cpp.backup => post_input_reorder.cpp} | 7 +++- ...s.cpp.backup => post_optimize_weights.cpp} | 4 +- ...p.backup => update_loop_primitive_map.cpp} | 0 ...nst.h.backup => lstm_dynamic_input_inst.h} | 8 ++-- .../src/graph/include/pass_manager.h | 17 +++----- .../{split_inst.h.backup => split_inst.h} | 2 +- .../src/graph/{lstm.cpp.backup => lstm.cpp} | 2 +- ...nput.cpp.backup => lstm_dynamic_input.cpp} | 2 +- src/plugins/intel_gpu/src/graph/program.cpp | 23 +++-------- .../src/graph/{split.cpp.backup => split.cpp} | 4 +- 16 files changed, 55 insertions(+), 77 deletions(-) rename src/plugins/intel_gpu/include/intel_gpu/primitives/{lstm_dynamic_input.hpp.backup => lstm_dynamic_input.hpp} (80%) rename src/plugins/intel_gpu/include/intel_gpu/primitives/{split.hpp.backup => split.hpp} (96%) rename src/plugins/intel_gpu/src/graph/graph_optimizer/{calculate_prior_boxes.cpp.backup => calculate_prior_boxes.cpp} (91%) rename src/plugins/intel_gpu/src/graph/graph_optimizer/{post_input_reorder.cpp.backup => post_input_reorder.cpp} (92%) rename src/plugins/intel_gpu/src/graph/graph_optimizer/{post_optimize_weights.cpp.backup => post_optimize_weights.cpp} (96%) rename src/plugins/intel_gpu/src/graph/graph_optimizer/{update_loop_primitive_map.cpp.backup => update_loop_primitive_map.cpp} (100%) rename src/plugins/intel_gpu/src/graph/include/{lstm_dynamic_input_inst.h.backup => lstm_dynamic_input_inst.h} (89%) rename src/plugins/intel_gpu/src/graph/include/{split_inst.h.backup => split_inst.h} (93%) rename src/plugins/intel_gpu/src/graph/{lstm.cpp.backup => lstm.cpp} (97%) rename src/plugins/intel_gpu/src/graph/{lstm_dynamic_input.cpp.backup => lstm_dynamic_input.cpp} (98%) rename src/plugins/intel_gpu/src/graph/{split.cpp.backup => split.cpp} (95%) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index b0f90bc1b82dee..7c5cba3a587290 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -288,9 +288,7 @@ struct program { void run_graph_compilation(); void pre_optimize_graph(bool is_internal); -#if 0 // TODO(Taylor) temporally removed for multiple output PoC to reduce scope void post_optimize_graph(bool is_internal); -#endif void cleanup(); void transfer_memory_to_device(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/concatenation.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/concatenation.hpp index c8c5f3c327c595..eb563ea6bd3e3c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/concatenation.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/concatenation.hpp @@ -55,7 +55,7 @@ struct concatenation : public primitive_base { /// @param axis Selected dimension for concatenation. concatenation( const primitive_id& id, - const input_info& input, + const std::vector& input, const concatenation_axis axis, const primitive_id& ext_prim_id = "", const padding& output_padding = padding()) diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm_dynamic_input.hpp.backup b/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm_dynamic_input.hpp similarity index 80% rename from src/plugins/intel_gpu/include/intel_gpu/primitives/lstm_dynamic_input.hpp.backup rename to src/plugins/intel_gpu/include/intel_gpu/primitives/lstm_dynamic_input.hpp index f993bcb8656b94..29b9110859626c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm_dynamic_input.hpp.backup +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm_dynamic_input.hpp @@ -35,13 +35,13 @@ struct lstm_dynamic_input : public primitive_base { /// @param recurrent Primitive id containing recurrent data. /// @param bias Primitive id containing bias data. Provide empty string if using lstm_dynamic without bias. lstm_dynamic_input(const primitive_id& id, - const primitive_id& input, + const input_info& input, const primitive_id& dyn_length, const primitive_id& weights, const primitive_id& bias = "", const primitive_id& ext_prim_id = "", const padding& output_padding = padding()) - : primitive_base(id, {input}, ext_prim_id, output_padding), dyn_length(dyn_length), weights(weights), bias(bias) {} + : primitive_base(id, {input}, ext_prim_id, {output_padding}), dyn_length(dyn_length), weights(weights), bias(bias) {} /// @brief Primitive id containing the dynamic sequence lengths. primitive_id dyn_length; @@ -51,13 +51,13 @@ struct lstm_dynamic_input : public primitive_base { primitive_id bias; protected: - std::vector> get_dependencies() const override { - std::vector> ret; - ret.push_back(dyn_length); - ret.push_back(weights); + std::vector, int>> get_dependencies() const override { + std::vector, int>> ret; + ret.push_back({std::ref(dyn_length), 0}); + ret.push_back({std::ref(weights), 0}); if (!bias.empty()) { - ret.push_back(bias); + ret.push_back({std::ref(bias), 0}); } return ret; } diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/split.hpp.backup b/src/plugins/intel_gpu/include/intel_gpu/primitives/split.hpp similarity index 96% rename from src/plugins/intel_gpu/include/intel_gpu/primitives/split.hpp.backup rename to src/plugins/intel_gpu/include/intel_gpu/primitives/split.hpp index cea5abedeee087..03252a9ade9705 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/split.hpp.backup +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/split.hpp @@ -44,11 +44,11 @@ struct split : public primitive_base { /// @param input Input primitive id. /// @param output_ids_offsets Pairs of output_ids and offsets split(const primitive_id& id, - const primitive_id& input, + const input_info& input, const std::vector >& output_ids_offsets, const primitive_id& ext_prim_id = "", const padding& output_padding = padding()) - : primitive_base(id, {input}, ext_prim_id, output_padding), + : primitive_base(id, {input}, ext_prim_id, {output_padding}), output_offsets(extract_tensor_vector(output_ids_offsets)), output_ids(extract_primitive_vector(output_ids_offsets)) {} diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/calculate_prior_boxes.cpp.backup b/src/plugins/intel_gpu/src/graph/graph_optimizer/calculate_prior_boxes.cpp similarity index 91% rename from src/plugins/intel_gpu/src/graph/graph_optimizer/calculate_prior_boxes.cpp.backup rename to src/plugins/intel_gpu/src/graph/graph_optimizer/calculate_prior_boxes.cpp index 4eec60ac04431a..16c61eb42a3a9c 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/calculate_prior_boxes.cpp.backup +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/calculate_prior_boxes.cpp @@ -26,7 +26,7 @@ void calculate_prior_boxes::run(program& p) { auto result = pb_node.get_result_buffer(); - auto& data_node = p.get_or_create(std::make_shared("_cldnn_tmp_" + pb_node.id() + "_result", result)); + auto& data_node = p.get_or_create(std::make_shared("_cldnn_tmp_" + pb_node.id() + "_result", std::vector{result})); p.replace(pb_node, data_node); } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp index e44fa068cdfc15..5f854eaaa4dfcc 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp @@ -6,22 +6,19 @@ #include "pass_manager.h" #include "program_node.h" -#if 0 // TODO(taylor) + #include "split_inst.h" #include "convolution_inst.h" #include "crop_inst.h" #include "lstm_inst.h" #include "reshape_inst.h" #include "resample_inst.h" -#endif #include "permute_inst.h" -#if 0 // TODO(taylor) #include "depth_to_space_inst.h" #include "lstm_dynamic_inst.h" #include "lstm_dynamic_input_inst.h" #include "lstm_dynamic_timeloop_inst.h" #include "mutable_data_inst.h" -#endif #include "arg_max_min_inst.h" #include "kernel_selector_utils.h" @@ -40,7 +37,7 @@ std::string get_id_string(size_t i) { ss << std::setw(5) << std::setfill('0') << i; return ss.str(); } -#if 0 // TODO(taylor) + void graph_initializations::handle_split_node(program& p, split_node& node) { if (!node.get_users().empty()) { throw std::logic_error("Split layer cannot be used directly! Please use split output \"" + node.id() + @@ -152,7 +149,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) { primitive_id crop_id = input.id() + ":crop:" + get_id_string(sequence_element); tensor crop_tensor{input_size.batch[0], 1, input_size.spatial[0], input_size.spatial[1]}; tensor offset_tensor{0, static_cast(sequence_element), 0, 0}; - auto input_crop = std::make_shared(crop_id, input.id(), crop_tensor, offset_tensor); + auto input_crop = std::make_shared(crop_id, input_info(input.id()), crop_tensor, offset_tensor); auto& input_crop_node = p.get_or_create(input_crop); // Add the crop nodes as user for input @@ -217,10 +214,10 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) { // primitive_id lstm_gemm_input_id = node->get_dependency(input_idx).get_primitive()->id; // the line below requires an attention: get_org_primitive_id() might not be an actual id of a node // (see rename method) ToDO: ensure that get_org_primitive_id() is suitable here - primitive_id lstm_gemm_input_id = node.get_dependency(input_idx).get_org_primitive_id(); + primitive_id lstm_gemm_input_id = node.get_dependency(input_idx).first->get_org_primitive_id(); auto lstm_gemm_node = std::make_shared(lstm_gemm_id, - lstm_gemm_input_id, + input_info(lstm_gemm_input_id), weights_id, recurrent_id, bias_id, @@ -229,7 +226,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) { auto& n1 = p.get_or_create(lstm_gemm_node); auto lstm_elt_node = std::make_shared(lstm_elt_id, - lstm_gemm_id, + input_info(lstm_gemm_id), cell_id, lstm_prim->clip, lstm_prim->input_forget, @@ -242,7 +239,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) { p.add_connection(n1, n2); // adding dependecy to lstm_gemm node // input - p.add_connection(node.get_dependency(input_idx), n1); + p.add_connection(*node.get_dependency(input_idx).first, n1); // adding weights and initial values to lstm_gemm p.add_connection(p.get_node(weights_id), n1); p.add_connection(p.get_node(recurrent_id), n1); @@ -264,7 +261,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) { { hidden_id = crop_id + ":hidden"; auto crop_hidden = - std::make_shared(hidden_id, lstm_elt_id, hidden_size, tensor{0, 0, 0, 0}); + std::make_shared(hidden_id, input_info(lstm_elt_id), hidden_size, tensor{0, 0, 0, 0}); auto& n3 = p.get_or_create(crop_hidden); // adding eltwise as dependency to hidden p.add_connection(n2, n3); @@ -284,7 +281,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) { // lstm_cell if (i < sequence_len - 1 || emit_last_cell) { cell_id = crop_id + ":cell"; - auto crop_cell = std::make_shared(cell_id, lstm_elt_id, hidden_size, tensor{0, 1, 0, 0}); + auto crop_cell = std::make_shared(cell_id, input_info(lstm_elt_id), hidden_size, tensor{0, 1, 0, 0}); auto& n4 = p.get_or_create(crop_cell); p.add_connection(n2, n4); cell_list[i * directions + dir] = &n4; @@ -296,9 +293,9 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) { } // if there is no next lstm, concatenation is created if (!has_lstm_children) { - std::vector output_ids_offsets; + std::vector output_ids_offsets; for (auto& e : output_map) { - output_ids_offsets.push_back(e.second.first); + output_ids_offsets.push_back(input_info(e.second.first)); } primitive_id concatenation_id = node.id() + ":concat"; auto concatenation_primitive = @@ -319,7 +316,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) { static_cast(concatenate_len), hidden_size.spatial[0], (int32_t)directions}; - auto reshape_primitive = std::make_shared(node.id() + ":reshape", concatenation_id, output_size); + auto reshape_primitive = std::make_shared(node.id() + ":reshape", input_info(concatenation_id), output_size); auto& reshape_node = p.get_or_create(reshape_primitive); p.add_connection(concatenation_node, reshape_node); p.replace_all_usages(node, reshape_node); @@ -345,12 +342,12 @@ void graph_initializations::handle_dynamic_lstm_node(program& p, lstm_dynamic_no // [1] Add lstm_dynamic_input auto lstm_dynamic_input_primitive = std::make_shared(node_id + suffix + "input", - input_id, + input_info(input_id), dyn_length_id, weights_id, bias_id, "", - node.get_primitive()->output_padding); + node.get_primitive()->output_paddings.at(0)); auto& lstm_dynamic_input_node = p.get_or_create(lstm_dynamic_input_primitive); p.add_connection(node.input(), lstm_dynamic_input_node); // connect real input to dlstm_input // connect other deps @@ -367,7 +364,7 @@ void graph_initializations::handle_dynamic_lstm_node(program& p, lstm_dynamic_no auto last_cell_id = node.last_cell_state_id(); auto lstm_dynamic_timeloop_primitive = std::make_shared(node_id + suffix + "timeloop", - lstm_dynamic_input_node.id(), + input_info(lstm_dynamic_input_node.id()), dyn_length_id, recurrent_id, last_hidden_id, @@ -377,7 +374,7 @@ void graph_initializations::handle_dynamic_lstm_node(program& p, lstm_dynamic_no node.clip(), node.input_forget(), "", - lstm_dynamic_input_primitive->output_padding); + lstm_dynamic_input_primitive->output_paddings.at(0)); auto& lstm_dynamic_timeloop_node = p.get_or_create(lstm_dynamic_timeloop_primitive); p.add_connection(lstm_dynamic_input_node, lstm_dynamic_timeloop_node); // connect dlstm_input to dlstm_timeloop // connect other deps @@ -406,7 +403,7 @@ void graph_initializations::handle_dynamic_lstm_node(program& p, lstm_dynamic_no // we dont have to set output since it will be done in next graph_opts step } -#endif + void graph_initializations::set_outputs(program& p) { auto outputs_option = p.get_options().get(); if (!outputs_option->outputs.empty()) { @@ -425,7 +422,6 @@ void graph_initializations::set_outputs(program& p) { } void graph_initializations::run(program& p) { -#if 0 // TODO(taylor) auto itr = p.nodes_map.begin(); while (itr != p.nodes_map.end()) { auto node_itr = itr++; @@ -438,7 +434,6 @@ void graph_initializations::run(program& p) { handle_dynamic_lstm_node(p, node->as()); } } -#endif set_outputs(p); p.get_processing_order().calc_processing_order(p); } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp.backup b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp similarity index 92% rename from src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp.backup rename to src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp index d2ab615804db38..331e842698fb7f 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp.backup +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp @@ -26,7 +26,10 @@ program_node& post_input_reorder::add_reorder(program& p, auto& new_reorder_node = p.get_or_create(new_reorder); // ToDo: add a method to program class which adds an intermediate node given a node and its user - auto it = std::find(usr->get_dependencies().begin(), usr->get_dependencies().end(), node); + auto it = std::find_if(usr->get_dependencies().begin(), usr->get_dependencies().end(), + [&](const std::pair& dep) { + return node == dep.first; + }); if (it == usr->get_dependencies().end()) { throw std::runtime_error("Inconcistency in topology description: user of a node is not present among its dependecies."); } @@ -52,7 +55,7 @@ void post_input_reorder::run(program& p) { *static_cast(fc_impl->_kernel_data.params.get()); auto layout_format = from_data_layout(fc_params.inputs[0].GetLayout()); - auto& input = node->get_dependencies()[0]; + auto& input = node->get_dependencies()[0].first; auto input_layout = input->get_output_layout(); if (input_layout.format != layout_format) { diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp.backup b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp similarity index 96% rename from src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp.backup rename to src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp index d404d4028d57a3..dfab5b097b980e 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp.backup +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp @@ -34,7 +34,7 @@ void post_optimize_weights::optimize_weights(T& node, program& p) { auto& weights_reorder_params = impl->_weights_reorder_params; for (auto i = offsets.weights_offset; i < offsets.bias_offset; i++) { - auto& weights_node = node.get_dependency(i); + auto& weights_node = *node.get_dependency(i).first; auto weights_layout = weights_node.get_output_layout(); auto reorders = _rf.get_weights_reorder(weights_node.id(), weights_layout, weights_reorder_params); @@ -43,7 +43,7 @@ void post_optimize_weights::optimize_weights(T& node, program& p) { // insert new generic_layer node to topology p.add_intermediate(reorder.first, node, i, !reorder.second); // set generic_layer's node output layout and implementation - auto& g_node = node.get_dependency(i); + auto& g_node = *node.get_dependency(i).first; g_node.get_output_layout(false); // Don't run impl selection to avoid double compilation of reorder kernels diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/update_loop_primitive_map.cpp.backup b/src/plugins/intel_gpu/src/graph/graph_optimizer/update_loop_primitive_map.cpp similarity index 100% rename from src/plugins/intel_gpu/src/graph/graph_optimizer/update_loop_primitive_map.cpp.backup rename to src/plugins/intel_gpu/src/graph/graph_optimizer/update_loop_primitive_map.cpp diff --git a/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_input_inst.h.backup b/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_input_inst.h similarity index 89% rename from src/plugins/intel_gpu/src/graph/include/lstm_dynamic_input_inst.h.backup rename to src/plugins/intel_gpu/src/graph/include/lstm_dynamic_input_inst.h index f92733f544c96d..d2d7f3756fb1e7 100644 --- a/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_input_inst.h.backup +++ b/src/plugins/intel_gpu/src/graph/include/lstm_dynamic_input_inst.h @@ -19,13 +19,13 @@ struct typed_program_node : public typed_program_node_base prim, program& prog) : parent(prim, prog) {} - program_node& input() const { return get_dependency(0); } - program_node& dyn_length() const { return get_dependency(1); } - program_node& weights() const { return get_dependency(2); } + program_node& input() const { return *get_dependency(0).first; } + program_node& dyn_length() const { return *get_dependency(1).first; } + program_node& weights() const { return *get_dependency(2).first; } program_node& bias() const { CLDNN_ERROR_BOOL(id(), "Bias term", !bias_term(), "Trying to get non existing bias."); - return get_dependency(3); + return *get_dependency(3).first; } int32_t direction() const { return weights().get_output_layout().size.feature[0]; } diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h index 94753749bb3908..8991b39d0d031c 100644 --- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h +++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h @@ -6,14 +6,11 @@ #include "intel_gpu/graph/program.hpp" #include "layout_optimizer.h" -#if 0 // TODO(taylor) #include "split_inst.h" #include "lstm_inst.h" #include "lstm_dynamic_inst.h" -#endif #include "quantize_inst.h" #include "eltwise_inst.h" -#include "reorder_inst.h" #include "convolution_inst.h" #include "program_node.h" #include @@ -73,14 +70,14 @@ class add_reshape_to_primitives : public base_pass { private: void run(program& p) override; }; -#if 0 // TODO(taylor) + class calculate_prior_boxes : public base_pass { public: calculate_prior_boxes() : base_pass("calculated_prior_boxes") {} private: void run(program& p) override; }; -#endif + class compile_graph : public base_pass { public: compile_graph() : base_pass("compile_graph") {} @@ -112,11 +109,9 @@ class graph_initializations : public base_pass { private: void run(program& p) override; -#if 0 // TODO(taylor) void handle_split_node(program& p, split_node& node); void handle_lstm_node(program& p, lstm_node& node); void handle_dynamic_lstm_node(program& p, lstm_dynamic_node& node); -#endif void set_outputs(program& p); }; @@ -237,7 +232,7 @@ class prepare_padding : public base_pass { void run(program& p) override; bool output_size_handling_enabled; }; -#if 0 // TODO(andrew) + class post_input_reorder : public base_pass { public: post_input_reorder() : base_pass("post_input_reorder") {} @@ -270,7 +265,7 @@ class post_optimize_weights : public base_pass { void optimize_weights(T& node, program& p); reorder_factory& _rf; }; -#endif + class propagate_constants : public base_pass { public: propagate_constants() : base_pass("propagate_constants") {} @@ -389,7 +384,7 @@ class oooq_memory_dependencies : public memory_dependency_pass { oooq_memory_dependencies() : memory_dependency_pass("oooq_memory_dependencies") {} void run(program& p) override; }; - +#endif class update_loop_primitive_map : public base_pass { public: update_loop_primitive_map() : base_pass("update_loop_primitive_map") {} @@ -397,7 +392,7 @@ class update_loop_primitive_map : public base_pass { private: void run(program& p) override; }; -#endif + class add_onednn_optimization_attributes : public base_pass { public: add_onednn_optimization_attributes() : base_pass("add_onednn_optimization_attributes") {} diff --git a/src/plugins/intel_gpu/src/graph/include/split_inst.h.backup b/src/plugins/intel_gpu/src/graph/include/split_inst.h similarity index 93% rename from src/plugins/intel_gpu/src/graph/include/split_inst.h.backup rename to src/plugins/intel_gpu/src/graph/include/split_inst.h index 13dbf8c9804868..438ff24eb945bb 100644 --- a/src/plugins/intel_gpu/src/graph/include/split_inst.h.backup +++ b/src/plugins/intel_gpu/src/graph/include/split_inst.h @@ -19,7 +19,7 @@ class typed_program_node : public typed_program_node_base { public: using parent::parent; - program_node& input() const { return get_dependency(0); } + program_node& input() const { return *get_dependency(0).first; } }; using split_node = typed_program_node; diff --git a/src/plugins/intel_gpu/src/graph/lstm.cpp.backup b/src/plugins/intel_gpu/src/graph/lstm.cpp similarity index 97% rename from src/plugins/intel_gpu/src/graph/lstm.cpp.backup rename to src/plugins/intel_gpu/src/graph/lstm.cpp index fe90e8e24c69bd..b40c4d6c391092 100644 --- a/src/plugins/intel_gpu/src/graph/lstm.cpp.backup +++ b/src/plugins/intel_gpu/src/graph/lstm.cpp @@ -16,7 +16,7 @@ primitive_type_id lstm::type_id() { } layout lstm_inst::calc_output_layout(lstm_node const& node) { - assert(static_cast(node.get_primitive()->output_data_type) == false && + assert(!node.get_primitive()->output_data_types.empty() && "Output data type forcing is not supported for lstm_node!"); auto input_layout = node.input().get_output_layout(); auto hidden_layout = node.inital_hidden().get_output_layout(); diff --git a/src/plugins/intel_gpu/src/graph/lstm_dynamic_input.cpp.backup b/src/plugins/intel_gpu/src/graph/lstm_dynamic_input.cpp similarity index 98% rename from src/plugins/intel_gpu/src/graph/lstm_dynamic_input.cpp.backup rename to src/plugins/intel_gpu/src/graph/lstm_dynamic_input.cpp index a2f4dda3ed0ada..0ddda66953a99f 100644 --- a/src/plugins/intel_gpu/src/graph/lstm_dynamic_input.cpp.backup +++ b/src/plugins/intel_gpu/src/graph/lstm_dynamic_input.cpp @@ -19,7 +19,7 @@ primitive_type_id lstm_dynamic_input::type_id() { // weights_tensor: [b: 1, f: direction, x: input_size, y: 4 * hidden_size] // output_tensor: [b: batch, f: max_sequence_length, x: 4 * hidden_size, y: direction] layout lstm_dynamic_input_inst::calc_output_layout(lstm_dynamic_input_node const& node) { - assert(static_cast(node.get_primitive()->output_data_type) == false && + assert(!node.get_primitive()->output_data_types.empty() && "Output data type forcing is not supported for lstm_dynamic_node!"); auto input_layout = node.input().get_output_layout(); auto weight_layout = node.weights().get_output_layout(); diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 4ec8b31e47c5b6..a9aec7e19c13cc 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -22,19 +22,15 @@ #include "roi_pooling_inst.h" #include "reorg_yolo_inst.h" -#if 0 // TODO(taylor) #include "eltwise_inst.h" -#endif #include "softmax_inst.h" #include "permute_inst.h" #include "custom_gpu_primitive_inst.h" #include "binary_convolution_inst.h" #include "resample_inst.h" #include "reshape_inst.h" -#if 0 // TODO(andrew) #include "quantize_inst.h" #include "activation_inst.h" -#endif #include "scale_inst.h" #include "depth_to_space_inst.h" #include "convolution_inst.h" @@ -43,8 +39,6 @@ #include "data_inst.h" #include "deconvolution_inst.h" #include "detection_output_inst.h" -#include "fully_connected_inst.h" -#include "gather_inst.h" #include "input_layout_inst.h" #include "shuffle_channels_inst.h" #include "arg_max_min_inst.h" @@ -52,25 +46,22 @@ #include "lstm_elt_inst.h" #include "lstm_gemm_inst.h" #include "mutable_data_inst.h" -#include "normalize_inst.h" #include "pooling_inst.h" #include "border_inst.h" #include "primitive_inst.h" #include "prior_box_inst.h" #include "proposal_inst.h" #include "reorder_inst.h" -#if 0 // TODO(andrew) #include "split_inst.h" -#endif #include "mvn_inst.h" -#if 0 // TODO(andrew) #include "gemm_inst.h" -#endif #include "reduce_inst.h" #include "region_yolo_inst.h" #include "strided_slice_inst.h" #include "loop_inst.h" - +// TODO(Andrew): Will be removed after ocl register enabled +#include "gather_inst.h" +#include "normalize_inst.h" #include "to_string_utils.h" #include "runtime/cldnn_itt.hpp" #include "runtime/kernels_cache.hpp" @@ -452,9 +443,7 @@ void program::build_program(bool is_internal) { init_graph(); { pre_optimize_graph(is_internal); } run_graph_compilation(); -#if 0 // TODO(taylor) { post_optimize_graph(is_internal); } -#endif GPU_DEBUG_GET_INSTANCE(debug_config); #ifdef GPU_DEBUG_CONFIG @@ -483,9 +472,8 @@ void program::build_program(bool is_internal) { void program::init_graph() { OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "ProgramImpl::InitGraph"); apply_opt_pass(); -#if 0 // TODO(taylor) + apply_opt_pass(); -#endif apply_opt_pass(); } @@ -573,7 +561,7 @@ void program::pre_optimize_graph(bool is_internal) { // add optimization attributes for onednn primitives apply_opt_pass(); } -#if 0 // TODO(taylor) + void program::post_optimize_graph(bool is_internal) { OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "ProgramImpl::PostOptimizeGraph"); // input reorder for fully connected if necessary @@ -596,7 +584,6 @@ void program::post_optimize_graph(bool is_internal) { // update loop input/output primitive mappings apply_opt_pass(); } -#endif // mark if the node is constant assuming that all dependencies are marked properly void program::mark_if_constant(program_node& node) { diff --git a/src/plugins/intel_gpu/src/graph/split.cpp.backup b/src/plugins/intel_gpu/src/graph/split.cpp similarity index 95% rename from src/plugins/intel_gpu/src/graph/split.cpp.backup rename to src/plugins/intel_gpu/src/graph/split.cpp index 2265b916f2a0ae..ce91ce9cb6d033 100644 --- a/src/plugins/intel_gpu/src/graph/split.cpp.backup +++ b/src/plugins/intel_gpu/src/graph/split.cpp @@ -16,12 +16,12 @@ primitive_type_id split::type_id() { } layout split_inst::calc_output_layout(split_node const& node) { - assert(static_cast(node.get_primitive()->output_data_type) == false && + assert(!node.get_primitive()->output_data_types.empty() && "Output data type forcing is not supported for split_node!"); auto output_ids = node.get_primitive()->output_ids; auto output_offsets = node.get_primitive()->output_offsets; auto param_num = output_ids.size(); - auto input_sizes = node.get_dependency(0).get_non_padded_output_layout().size; + auto input_sizes = node.get_dependency(0).first->get_non_padded_output_layout().size; tensor null_tensor { 0, 0, 0, 0 }; // check if output_ids count equals output_offsets count