From 46e8aad4bbbb688e8d361211954a936ae8d8bfaf Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Sat, 25 Feb 2023 07:26:54 +0900 Subject: [PATCH] [GPU] Fix output format not changing at runtime (#15887) * [GPU] Fix output format not changing at runtime Signed-off-by: Andrew Park * Add remove_redundant_reorders pass TC for ov_gpu_unit_tests Signed-off-by: Andrew Park --------- Signed-off-by: Andrew Park --- .../remove_redundant_reorders.cpp | 12 ++++ .../remove_redundant_reorders_tests.cpp | 62 +++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index 06d2b2852d666c..2bfb2e9bcb2f7d 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -169,6 +169,18 @@ void remove_redundant_reorders::run(program& p) { !r_node.get_primitive()->has_surface_input(); if (remove_dep) { + // for chains like + // b_fs_yx_fsv16 -> reorder(ofmt:bfyx) -> bfyx -> reorder(ofmt:any) -> bfyx + // if output_format of current node is format::any, input format of the dependency node is propagated as it is + // b_fs_yx_fsv16 -> reorder(ofmt:any) -> b_fs_yx_fsv16 + // so output format of dependency node must be stored in output_format of current node + // b_fs_yx_fsv16 -> reorder(ofmt:bfyx) -> bfyx + auto output_layout = r_dep_node.get_output_layout(); + auto prim = std::const_pointer_cast(r_node.get_primitive()); + if (prim->output_format == format::any) + prim->output_format = output_layout.format; + + LOG_NODE_REMOVAL(r_dep_node.id()); r_dep_node.can_be_optimized(true); p.add_optimized_primitive_info(r_dep_node.id()); p.extract_and_remove(r_dep_node); diff --git a/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp b/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp new file mode 100644 index 00000000000000..b2c62a5506b302 --- /dev/null +++ b/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp @@ -0,0 +1,62 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" + +#include "intel_gpu/runtime/engine.hpp" + +#include "intel_gpu/graph/network.hpp" +#include "intel_gpu/graph/program.hpp" +#include "data_inst.h" +#include "convolution_inst.h" +#include "reorder_inst.h" +#include "softmax_inst.h" + +#include "pass_manager.h" +#include "to_string_utils.h" + +#include "program_wrapper.h" + +#include + +using namespace cldnn; +using namespace ::tests; + +TEST(remove_redundant_reorders, remove_dep_dynamic) { + // Topology: + // convolution -> reorder -> softmax + // + // Expectation: + // The preferred format of convolution should be selected as b_fs_yx_fsv16 (reorder_inputs) + // A new reorder that converts to bfyx should be inserted after convolution (reorder_inputs) + // In reorders, output format of dependency reorder should be saved as output_format of orginial reorder (remove_redundant_reorders) + + auto& engine = get_test_engine(); + auto input_layout_dynamic = layout{ov::PartialShape{1, 3, ov::Dimension::dynamic(), ov::Dimension::dynamic()}, + data_types::f16, format::bfyx}; + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 224, 224 } }); + auto weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 64, 3, 7, 7 } }); + + topology topology; + topology.add(data("weights", weights)); + topology.add(input_layout("input", input_layout_dynamic)); + topology.add(convolution("conv", input_info("input"), { "weights" })); + topology.add(reorder("reorder", input_info("conv"), format::any, data_types::f32)); + topology.add(softmax("softmax", input_info("reorder"), 1)); + + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); + network.set_input_data("input", input); + + network.execute(); + + auto prog = network.get_program(); + ASSERT_NE(prog, nullptr); + auto& softmax_node = prog->get_node("softmax"); + auto softmax_layout = softmax_node.get_output_layout(); + + ASSERT_EQ(softmax_layout.format.value, format::bfyx); +}