Skip to content

Commit

Permalink
post-optimization passes enablement (openvinotoolkit#62)
Browse files Browse the repository at this point in the history
* [GPU] Enable graph_initializations/calculate_prior_boxes for init_graph passes

Signed-off-by: Andrew Park <[email protected]>

* [GPU] Enable post_input_reorder/post_optimize_weights for post-optimization passes

Signed-off-by: Andrew Park <[email protected]>

* [GPU] Enable update_loop_primitive_map for post-optimization passes

- check results with already enabled opt passes

Signed-off-by: Andrew Park <[email protected]>
  • Loading branch information
andrew-k-park authored Apr 13, 2022
1 parent 72c573b commit fb746f1
Show file tree
Hide file tree
Showing 16 changed files with 55 additions and 77 deletions.
2 changes: 0 additions & 2 deletions src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,7 @@ struct program {

void run_graph_compilation();
void pre_optimize_graph(bool is_internal);
#if 0 // TODO(Taylor) temporally removed for multiple output PoC to reduce scope
void post_optimize_graph(bool is_internal);
#endif
void cleanup();
void transfer_memory_to_device();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ struct concatenation : public primitive_base<concatenation> {
/// @param axis Selected dimension for concatenation.
concatenation(
const primitive_id& id,
const input_info& input,
const std::vector<input_info>& input,
const concatenation_axis axis,
const primitive_id& ext_prim_id = "",
const padding& output_padding = padding())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ struct lstm_dynamic_input : public primitive_base<lstm_dynamic_input> {
/// @param recurrent Primitive id containing recurrent data.
/// @param bias Primitive id containing bias data. Provide empty string if using lstm_dynamic without bias.
lstm_dynamic_input(const primitive_id& id,
const primitive_id& input,
const input_info& input,
const primitive_id& dyn_length,
const primitive_id& weights,
const primitive_id& bias = "",
const primitive_id& ext_prim_id = "",
const padding& output_padding = padding())
: primitive_base(id, {input}, ext_prim_id, output_padding), dyn_length(dyn_length), weights(weights), bias(bias) {}
: primitive_base(id, {input}, ext_prim_id, {output_padding}), dyn_length(dyn_length), weights(weights), bias(bias) {}

/// @brief Primitive id containing the dynamic sequence lengths.
primitive_id dyn_length;
Expand All @@ -51,13 +51,13 @@ struct lstm_dynamic_input : public primitive_base<lstm_dynamic_input> {
primitive_id bias;

protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret;
ret.push_back(dyn_length);
ret.push_back(weights);
std::vector<std::pair<std::reference_wrapper<const primitive_id>, int>> get_dependencies() const override {
std::vector<std::pair<std::reference_wrapper<const primitive_id>, int>> ret;
ret.push_back({std::ref(dyn_length), 0});
ret.push_back({std::ref(weights), 0});

if (!bias.empty()) {
ret.push_back(bias);
ret.push_back({std::ref(bias), 0});
}
return ret;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ struct split : public primitive_base<split> {
/// @param input Input primitive id.
/// @param output_ids_offsets Pairs of output_ids and offsets
split(const primitive_id& id,
const primitive_id& input,
const input_info& input,
const std::vector<std::pair<primitive_id, tensor> >& output_ids_offsets,
const primitive_id& ext_prim_id = "",
const padding& output_padding = padding())
: primitive_base(id, {input}, ext_prim_id, output_padding),
: primitive_base(id, {input}, ext_prim_id, {output_padding}),
output_offsets(extract_tensor_vector(output_ids_offsets)),
output_ids(extract_primitive_vector(output_ids_offsets)) {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ void calculate_prior_boxes::run(program& p) {

auto result = pb_node.get_result_buffer();

auto& data_node = p.get_or_create(std::make_shared<data>("_cldnn_tmp_" + pb_node.id() + "_result", result));
auto& data_node = p.get_or_create(std::make_shared<data>("_cldnn_tmp_" + pb_node.id() + "_result", std::vector<memory::ptr>{result}));
p.replace(pb_node, data_node);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,19 @@

#include "pass_manager.h"
#include "program_node.h"
#if 0 // TODO(taylor)

#include "split_inst.h"
#include "convolution_inst.h"
#include "crop_inst.h"
#include "lstm_inst.h"
#include "reshape_inst.h"
#include "resample_inst.h"
#endif
#include "permute_inst.h"
#if 0 // TODO(taylor)
#include "depth_to_space_inst.h"
#include "lstm_dynamic_inst.h"
#include "lstm_dynamic_input_inst.h"
#include "lstm_dynamic_timeloop_inst.h"
#include "mutable_data_inst.h"
#endif
#include "arg_max_min_inst.h"
#include "kernel_selector_utils.h"

Expand All @@ -40,7 +37,7 @@ std::string get_id_string(size_t i) {
ss << std::setw(5) << std::setfill('0') << i;
return ss.str();
}
#if 0 // TODO(taylor)

void graph_initializations::handle_split_node(program& p, split_node& node) {
if (!node.get_users().empty()) {
throw std::logic_error("Split layer cannot be used directly! Please use split output \"" + node.id() +
Expand Down Expand Up @@ -152,7 +149,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) {
primitive_id crop_id = input.id() + ":crop:" + get_id_string(sequence_element);
tensor crop_tensor{input_size.batch[0], 1, input_size.spatial[0], input_size.spatial[1]};
tensor offset_tensor{0, static_cast<tensor::value_type>(sequence_element), 0, 0};
auto input_crop = std::make_shared<crop>(crop_id, input.id(), crop_tensor, offset_tensor);
auto input_crop = std::make_shared<crop>(crop_id, input_info(input.id()), crop_tensor, offset_tensor);
auto& input_crop_node = p.get_or_create(input_crop);

// Add the crop nodes as user for input
Expand Down Expand Up @@ -217,10 +214,10 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) {
// primitive_id lstm_gemm_input_id = node->get_dependency(input_idx).get_primitive()->id;
// the line below requires an attention: get_org_primitive_id() might not be an actual id of a node
// (see rename method) ToDO: ensure that get_org_primitive_id() is suitable here
primitive_id lstm_gemm_input_id = node.get_dependency(input_idx).get_org_primitive_id();
primitive_id lstm_gemm_input_id = node.get_dependency(input_idx).first->get_org_primitive_id();

auto lstm_gemm_node = std::make_shared<lstm_gemm>(lstm_gemm_id,
lstm_gemm_input_id,
input_info(lstm_gemm_input_id),
weights_id,
recurrent_id,
bias_id,
Expand All @@ -229,7 +226,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) {
auto& n1 = p.get_or_create(lstm_gemm_node);

auto lstm_elt_node = std::make_shared<lstm_elt>(lstm_elt_id,
lstm_gemm_id,
input_info(lstm_gemm_id),
cell_id,
lstm_prim->clip,
lstm_prim->input_forget,
Expand All @@ -242,7 +239,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) {
p.add_connection(n1, n2);
// adding dependecy to lstm_gemm node
// input
p.add_connection(node.get_dependency(input_idx), n1);
p.add_connection(*node.get_dependency(input_idx).first, n1);
// adding weights and initial values to lstm_gemm
p.add_connection(p.get_node(weights_id), n1);
p.add_connection(p.get_node(recurrent_id), n1);
Expand All @@ -264,7 +261,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) {
{
hidden_id = crop_id + ":hidden";
auto crop_hidden =
std::make_shared<crop>(hidden_id, lstm_elt_id, hidden_size, tensor{0, 0, 0, 0});
std::make_shared<crop>(hidden_id, input_info(lstm_elt_id), hidden_size, tensor{0, 0, 0, 0});
auto& n3 = p.get_or_create(crop_hidden);
// adding eltwise as dependency to hidden
p.add_connection(n2, n3);
Expand All @@ -284,7 +281,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) {
// lstm_cell
if (i < sequence_len - 1 || emit_last_cell) {
cell_id = crop_id + ":cell";
auto crop_cell = std::make_shared<crop>(cell_id, lstm_elt_id, hidden_size, tensor{0, 1, 0, 0});
auto crop_cell = std::make_shared<crop>(cell_id, input_info(lstm_elt_id), hidden_size, tensor{0, 1, 0, 0});
auto& n4 = p.get_or_create(crop_cell);
p.add_connection(n2, n4);
cell_list[i * directions + dir] = &n4;
Expand All @@ -296,9 +293,9 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) {
}
// if there is no next lstm, concatenation is created
if (!has_lstm_children) {
std::vector<primitive_id> output_ids_offsets;
std::vector<input_info> output_ids_offsets;
for (auto& e : output_map) {
output_ids_offsets.push_back(e.second.first);
output_ids_offsets.push_back(input_info(e.second.first));
}
primitive_id concatenation_id = node.id() + ":concat";
auto concatenation_primitive =
Expand All @@ -319,7 +316,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) {
static_cast<int32_t>(concatenate_len),
hidden_size.spatial[0],
(int32_t)directions};
auto reshape_primitive = std::make_shared<reshape>(node.id() + ":reshape", concatenation_id, output_size);
auto reshape_primitive = std::make_shared<reshape>(node.id() + ":reshape", input_info(concatenation_id), output_size);
auto& reshape_node = p.get_or_create(reshape_primitive);
p.add_connection(concatenation_node, reshape_node);
p.replace_all_usages(node, reshape_node);
Expand All @@ -345,12 +342,12 @@ void graph_initializations::handle_dynamic_lstm_node(program& p, lstm_dynamic_no
// [1] Add lstm_dynamic_input
auto lstm_dynamic_input_primitive =
std::make_shared<lstm_dynamic_input>(node_id + suffix + "input",
input_id,
input_info(input_id),
dyn_length_id,
weights_id,
bias_id,
"",
node.get_primitive()->output_padding);
node.get_primitive()->output_paddings.at(0));
auto& lstm_dynamic_input_node = p.get_or_create(lstm_dynamic_input_primitive);
p.add_connection(node.input(), lstm_dynamic_input_node); // connect real input to dlstm_input
// connect other deps
Expand All @@ -367,7 +364,7 @@ void graph_initializations::handle_dynamic_lstm_node(program& p, lstm_dynamic_no
auto last_cell_id = node.last_cell_state_id();
auto lstm_dynamic_timeloop_primitive =
std::make_shared<lstm_dynamic_timeloop>(node_id + suffix + "timeloop",
lstm_dynamic_input_node.id(),
input_info(lstm_dynamic_input_node.id()),
dyn_length_id,
recurrent_id,
last_hidden_id,
Expand All @@ -377,7 +374,7 @@ void graph_initializations::handle_dynamic_lstm_node(program& p, lstm_dynamic_no
node.clip(),
node.input_forget(),
"",
lstm_dynamic_input_primitive->output_padding);
lstm_dynamic_input_primitive->output_paddings.at(0));
auto& lstm_dynamic_timeloop_node = p.get_or_create(lstm_dynamic_timeloop_primitive);
p.add_connection(lstm_dynamic_input_node, lstm_dynamic_timeloop_node); // connect dlstm_input to dlstm_timeloop
// connect other deps
Expand Down Expand Up @@ -406,7 +403,7 @@ void graph_initializations::handle_dynamic_lstm_node(program& p, lstm_dynamic_no

// we dont have to set output since it will be done in next graph_opts step
}
#endif

void graph_initializations::set_outputs(program& p) {
auto outputs_option = p.get_options().get<build_option_type::outputs>();
if (!outputs_option->outputs.empty()) {
Expand All @@ -425,7 +422,6 @@ void graph_initializations::set_outputs(program& p) {
}

void graph_initializations::run(program& p) {
#if 0 // TODO(taylor)
auto itr = p.nodes_map.begin();
while (itr != p.nodes_map.end()) {
auto node_itr = itr++;
Expand All @@ -438,7 +434,6 @@ void graph_initializations::run(program& p) {
handle_dynamic_lstm_node(p, node->as<lstm_dynamic>());
}
}
#endif
set_outputs(p);
p.get_processing_order().calc_processing_order(p);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ program_node& post_input_reorder::add_reorder(program& p,
auto& new_reorder_node = p.get_or_create(new_reorder);

// ToDo: add a method to program class which adds an intermediate node given a node and its user
auto it = std::find(usr->get_dependencies().begin(), usr->get_dependencies().end(), node);
auto it = std::find_if(usr->get_dependencies().begin(), usr->get_dependencies().end(),
[&](const std::pair<program_node*, int>& dep) {
return node == dep.first;
});
if (it == usr->get_dependencies().end()) {
throw std::runtime_error("Inconcistency in topology description: user of a node is not present among its dependecies.");
}
Expand All @@ -52,7 +55,7 @@ void post_input_reorder::run(program& p) {
*static_cast<kernel_selector::fully_connected_params*>(fc_impl->_kernel_data.params.get());

auto layout_format = from_data_layout(fc_params.inputs[0].GetLayout());
auto& input = node->get_dependencies()[0];
auto& input = node->get_dependencies()[0].first;
auto input_layout = input->get_output_layout();

if (input_layout.format != layout_format) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void post_optimize_weights::optimize_weights(T& node, program& p) {
auto& weights_reorder_params = impl->_weights_reorder_params;

for (auto i = offsets.weights_offset; i < offsets.bias_offset; i++) {
auto& weights_node = node.get_dependency(i);
auto& weights_node = *node.get_dependency(i).first;
auto weights_layout = weights_node.get_output_layout();

auto reorders = _rf.get_weights_reorder(weights_node.id(), weights_layout, weights_reorder_params);
Expand All @@ -43,7 +43,7 @@ void post_optimize_weights::optimize_weights(T& node, program& p) {
// insert new generic_layer node to topology
p.add_intermediate(reorder.first, node, i, !reorder.second);
// set generic_layer's node output layout and implementation
auto& g_node = node.get_dependency(i);
auto& g_node = *node.get_dependency(i).first;
g_node.get_output_layout(false);

// Don't run impl selection to avoid double compilation of reorder kernels
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ struct typed_program_node<lstm_dynamic_input> : public typed_program_node_base<l
public:
typed_program_node(std::shared_ptr<primitive> prim, program& prog) : parent(prim, prog) {}

program_node& input() const { return get_dependency(0); }
program_node& dyn_length() const { return get_dependency(1); }
program_node& weights() const { return get_dependency(2); }
program_node& input() const { return *get_dependency(0).first; }
program_node& dyn_length() const { return *get_dependency(1).first; }
program_node& weights() const { return *get_dependency(2).first; }

program_node& bias() const {
CLDNN_ERROR_BOOL(id(), "Bias term", !bias_term(), "Trying to get non existing bias.");
return get_dependency(3);
return *get_dependency(3).first;
}

int32_t direction() const { return weights().get_output_layout().size.feature[0]; }
Expand Down
17 changes: 6 additions & 11 deletions src/plugins/intel_gpu/src/graph/include/pass_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,11 @@

#include "intel_gpu/graph/program.hpp"
#include "layout_optimizer.h"
#if 0 // TODO(taylor)
#include "split_inst.h"
#include "lstm_inst.h"
#include "lstm_dynamic_inst.h"
#endif
#include "quantize_inst.h"
#include "eltwise_inst.h"
#include "reorder_inst.h"
#include "convolution_inst.h"
#include "program_node.h"
#include <string>
Expand Down Expand Up @@ -73,14 +70,14 @@ class add_reshape_to_primitives : public base_pass {
private:
void run(program& p) override;
};
#if 0 // TODO(taylor)

class calculate_prior_boxes : public base_pass {
public:
calculate_prior_boxes() : base_pass("calculated_prior_boxes") {}
private:
void run(program& p) override;
};
#endif

class compile_graph : public base_pass {
public:
compile_graph() : base_pass("compile_graph") {}
Expand Down Expand Up @@ -112,11 +109,9 @@ class graph_initializations : public base_pass {

private:
void run(program& p) override;
#if 0 // TODO(taylor)
void handle_split_node(program& p, split_node& node);
void handle_lstm_node(program& p, lstm_node& node);
void handle_dynamic_lstm_node(program& p, lstm_dynamic_node& node);
#endif
void set_outputs(program& p);
};

Expand Down Expand Up @@ -237,7 +232,7 @@ class prepare_padding : public base_pass {
void run(program& p) override;
bool output_size_handling_enabled;
};
#if 0 // TODO(andrew)

class post_input_reorder : public base_pass {
public:
post_input_reorder() : base_pass("post_input_reorder") {}
Expand Down Expand Up @@ -270,7 +265,7 @@ class post_optimize_weights : public base_pass {
void optimize_weights(T& node, program& p);
reorder_factory& _rf;
};
#endif

class propagate_constants : public base_pass {
public:
propagate_constants() : base_pass("propagate_constants") {}
Expand Down Expand Up @@ -389,15 +384,15 @@ class oooq_memory_dependencies : public memory_dependency_pass {
oooq_memory_dependencies() : memory_dependency_pass("oooq_memory_dependencies") {}
void run(program& p) override;
};

#endif
class update_loop_primitive_map : public base_pass {
public:
update_loop_primitive_map() : base_pass("update_loop_primitive_map") {}

private:
void run(program& p) override;
};
#endif

class add_onednn_optimization_attributes : public base_pass {
public:
add_onednn_optimization_attributes() : base_pass("add_onednn_optimization_attributes") {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class typed_program_node<split> : public typed_program_node_base<split> {
public:
using parent::parent;

program_node& input() const { return get_dependency(0); }
program_node& input() const { return *get_dependency(0).first; }
};

using split_node = typed_program_node<split>;
Expand Down
Loading

0 comments on commit fb746f1

Please sign in to comment.