diff --git a/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp index 3d15d463367b84..c5e8b94049b4ed 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp @@ -74,8 +74,8 @@ TEST(TransformationTests, LowLatencyLSTM) { } { auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); - auto H_t = std::make_shared(element::f32, Shape{1, 128}); - auto C_t = std::make_shared(element::f32, Shape{1, 128}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}, std::vector(128, 0)); + auto C_t = std::make_shared(element::f32, Shape{1, 128}, std::vector(128, 0)); const std::string variable_name_H("LSTMTensorIterator/variable0"); const std::string variable_name_C("LSTMTensorIterator/variable1"); @@ -98,7 +98,7 @@ TEST(TransformationTests, LowLatencyLSTM) { auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); auto res_2 = std::make_shared(unsqueeze); auto res_1 = std::make_shared(lstm_cell->output(0)); - f_ref = std::make_shared(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t}); + f_ref = std::make_shared(OutputVector{res_1, res_2}, ParameterVector{Xi}); f_ref->add_sinks({assign_C, assign_H}); assign_H->add_control_dependency(read_value_H); assign_C->add_control_dependency(read_value_C); @@ -155,7 +155,7 @@ TEST(TransformationTests, LowLatencyGRU) { } { auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); - auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}, std::vector(128, 0)); const std::string variable_name_H("GRUTensorIterator/variable0"); auto read_value_H = std::make_shared(H_t, variable_name_H); @@ -175,7 +175,7 @@ TEST(TransformationTests, LowLatencyGRU) { auto res_1 = std::make_shared(assign_H); auto unsqueeze = std::make_shared(rnn_cell->output(0), axis); auto res_2 = std::make_shared(unsqueeze); - f_ref = std::make_shared(OutputVector{unsqueeze}, ParameterVector{Xi, H_t}); + f_ref = std::make_shared(OutputVector{unsqueeze}, ParameterVector{Xi}); f_ref->add_sinks({assign_H}); assign_H->add_control_dependency(read_value_H); } @@ -232,7 +232,7 @@ TEST(TransformationTests, LowLatencyRNN) { } { auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); - auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}, std::vector(128, 0)); const std::string variable_name_H("RNNTensorIterator/variable0"); auto read_value_H = std::make_shared(H_t, variable_name_H); @@ -252,7 +252,7 @@ TEST(TransformationTests, LowLatencyRNN) { auto res_1 = std::make_shared(assign_H); auto unsqueeze = std::make_shared(rnn_cell->output(0), axis); auto res_2 = std::make_shared(unsqueeze); - f_ref = std::make_shared(OutputVector{unsqueeze}, ParameterVector{Xi, H_t}); + f_ref = std::make_shared(OutputVector{unsqueeze}, ParameterVector{Xi}); f_ref->add_sinks({assign_H}); assign_H->add_control_dependency(read_value_H); } @@ -319,8 +319,8 @@ TEST(TransformationTests, LowLatencyLSTMReshape) { } { auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); - auto H_t = std::make_shared(element::f32, Shape{1, 128}); - auto C_t = std::make_shared(element::f32, Shape{1, 128}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}, std::vector(128, 0)); + auto C_t = std::make_shared(element::f32, Shape{1, 128}, std::vector(128, 0)); const std::string variable_name_H("LSTMTensorIterator/variable0"); const std::string variable_name_C("LSTMTensorIterator/variable1"); @@ -343,7 +343,7 @@ TEST(TransformationTests, LowLatencyLSTMReshape) { auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); auto res_2 = std::make_shared(unsqueeze); auto res_1 = std::make_shared(lstm_cell->output(0)); - f_ref = std::make_shared(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t}); + f_ref = std::make_shared(OutputVector{res_1, res_2}, ParameterVector{Xi}); f_ref->add_sinks({assign_C, assign_H}); assign_H->add_control_dependency(read_value_H); assign_C->add_control_dependency(read_value_C); @@ -351,3 +351,57 @@ TEST(TransformationTests, LowLatencyLSTMReshape) { auto res = compare_functions(f, f_ref); ASSERT_TRUE(res.first) << res.second; } + +TEST(TransformationTests, LowLatencyLSTM_3dinput) { + std::shared_ptr f(nullptr), f_ref(nullptr); + + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_init = std::make_shared(element::f32, Shape{1, 128}); + auto C_init = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = ngraph::opset5::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = ngraph::opset5::Constant::create(ngraph::element::f32, ngraph::Shape{512, 16}, w_val); + auto R = ngraph::opset5::Constant::create(ngraph::element::f32, ngraph::Shape{512, 128}, r_val); + auto B = ngraph::opset5::Constant::create(ngraph::element::f32, ngraph::Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, H_t, C_t, W, R, B, 128); + auto res_1 = std::make_shared(lstm_cell->output(0)); + auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); + auto res_2 = std::make_shared(unsqueeze); + auto res_3 = std::make_shared(lstm_cell->output(1)); + auto body = std::make_shared(OutputVector{res_1, res_2, res_3}, ParameterVector{H_t, Xi, C_t}); + + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_body(body); + tensor_iterator->set_friendly_name("LSTMTensorIterator"); + + tensor_iterator->set_merged_input(C_t, C_init, res_3); + tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + tensor_iterator->set_merged_input(H_t, H_init, res_1); + + auto out0 = tensor_iterator->get_iter_value(res_1, -1); + auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(tensor_iterator->output(1)); + auto res_ti_2 = std::make_shared(tensor_iterator->output(0)); + f = std::make_shared(ngraph::NodeVector{res_ti_1, res_ti_2}, + ngraph::ParameterVector{X, H_init, C_init}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + + ASSERT_EQ(body->get_parameters().size(), 1); + ASSERT_EQ(tensor_iterator->get_input_descriptions()[0]->m_body_parameter_index, 0); +} diff --git a/ngraph/core/src/pass/low_latency.cpp b/ngraph/core/src/pass/low_latency.cpp index 89a7a73c4996b2..772135bb9db790 100644 --- a/ngraph/core/src/pass/low_latency.cpp +++ b/ngraph/core/src/pass/low_latency.cpp @@ -9,6 +9,7 @@ #include #include #include +#include NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency", 0); @@ -29,7 +30,9 @@ ngraph::pass::LowLatency::LowLatency() int64_t variable_id = 0; std::vector> assigns; const auto& func = ti->get_function(); - for (const auto& in : ti->get_input_descriptions()) + auto in_descs = ti->get_input_descriptions(); + std::vector inputs_ind_to_delete; + for (const auto& in : in_descs) { // Process all back edges if (const auto& merged_in = std::dynamic_pointer_cast< @@ -44,8 +47,14 @@ ngraph::pass::LowLatency::LowLatency() .at(merged_in->m_body_parameter_index) ->get_friendly_name() + "/variable_" + std::to_string(variable_id)); - auto read_value = std::make_shared( - func->get_parameters().at(merged_in->m_body_parameter_index), variable_name); + auto init_shape = func->get_parameters().at(merged_in->m_body_parameter_index)->get_partial_shape().get_shape(); + int zeros_length = 1.0; + for (auto i : init_shape){ + zeros_length *= i; + } + std::vector zeros(zeros_length, 0); + auto init_const = op::Constant::create(element::f32, init_shape, zeros); + auto read_value = std::make_shared(init_const, variable_name); read_value->set_friendly_name(variable_name); for (const auto& input_to : inputs_to) { @@ -58,9 +67,34 @@ ngraph::pass::LowLatency::LowLatency() // control dependency so that ReadValue is processed before Assign assign->add_control_dependency(read_value); assigns.emplace_back(assign); + // save index of input to delete + inputs_ind_to_delete.push_back(merged_in->m_body_parameter_index); } variable_id++; + } + + std::sort(inputs_ind_to_delete.begin(), inputs_ind_to_delete.end()); + auto params = func->get_parameters(); + for (int i=inputs_ind_to_delete.size()-1; i>=0; i--){ + func->remove_parameter(params[inputs_ind_to_delete[i]]); + } + + // remove replaced inputs from descriptions + ti->get_input_descriptions().erase(remove_if(ti->get_input_descriptions().begin(), ti->get_input_descriptions().end(), + [](ngraph::op::util::InputDescriptionPtr it){ + return std::dynamic_pointer_cast(it); + }), ti->get_input_descriptions().end()); + + // fix indexes for other inputs + for (const auto& in : in_descs) + { + int already_deleted = 0; + while(in->m_body_parameter_index > inputs_ind_to_delete[already_deleted]){ + already_deleted++; + } + in->m_body_parameter_index -= already_deleted; } + // save Assign in the func so that it gets into graph traversals and isn't deleted. func->add_sinks(assigns); return false;