From 46e8aad4bbbb688e8d361211954a936ae8d8bfaf Mon Sep 17 00:00:00 2001
From: Andrew Kwangwoong Park <andrew.park@intel.com>
Date: Sat, 25 Feb 2023 07:26:54 +0900
Subject: [PATCH] [GPU] Fix output format not changing at runtime (#15887)

* [GPU] Fix output format not changing at runtime

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Add remove_redundant_reorders pass TC for ov_gpu_unit_tests

Signed-off-by: Andrew Park <andrew.park@intel.com>

---------

Signed-off-by: Andrew Park <andrew.park@intel.com>
---
 .../remove_redundant_reorders.cpp             | 12 ++++
 .../remove_redundant_reorders_tests.cpp       | 62 +++++++++++++++++++
 2 files changed, 74 insertions(+)
 create mode 100644 src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
index 06d2b2852d666c..2bfb2e9bcb2f7d 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
@@ -169,6 +169,18 @@ void remove_redundant_reorders::run(program& p) {
             !r_node.get_primitive()->has_surface_input();
 
         if (remove_dep) {
+            // for chains like
+            // b_fs_yx_fsv16 -> reorder(ofmt:bfyx) -> bfyx -> reorder(ofmt:any) -> bfyx
+            // if output_format of current node is format::any, input format of the dependency node is propagated as it is
+            // b_fs_yx_fsv16 -> reorder(ofmt:any) -> b_fs_yx_fsv16
+            // so output format of dependency node must be stored in output_format of current node
+            // b_fs_yx_fsv16 -> reorder(ofmt:bfyx) -> bfyx
+            auto output_layout = r_dep_node.get_output_layout();
+            auto prim = std::const_pointer_cast<reorder>(r_node.get_primitive());
+            if (prim->output_format == format::any)
+                prim->output_format = output_layout.format;
+
+            LOG_NODE_REMOVAL(r_dep_node.id());
             r_dep_node.can_be_optimized(true);
             p.add_optimized_primitive_info(r_dep_node.id());
             p.extract_and_remove(r_dep_node);
diff --git a/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp b/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp
new file mode 100644
index 00000000000000..b2c62a5506b302
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp
@@ -0,0 +1,62 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+
+#include "intel_gpu/runtime/engine.hpp"
+
+#include "intel_gpu/graph/network.hpp"
+#include "intel_gpu/graph/program.hpp"
+#include "data_inst.h"
+#include "convolution_inst.h"
+#include "reorder_inst.h"
+#include "softmax_inst.h"
+
+#include "pass_manager.h"
+#include "to_string_utils.h"
+
+#include "program_wrapper.h"
+
+#include <memory>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(remove_redundant_reorders, remove_dep_dynamic) {
+    // Topology:
+    // convolution -> reorder -> softmax
+    //
+    // Expectation:
+    // The preferred format of convolution should be selected as b_fs_yx_fsv16 (reorder_inputs)
+    // A new reorder that converts to bfyx should be inserted after convolution (reorder_inputs)
+    // In reorders, output format of dependency reorder should be saved as output_format of orginial reorder (remove_redundant_reorders)
+
+    auto& engine = get_test_engine();
+    auto input_layout_dynamic = layout{ov::PartialShape{1, 3, ov::Dimension::dynamic(), ov::Dimension::dynamic()},
+                                       data_types::f16, format::bfyx};
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 224, 224 } });
+    auto weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 64, 3, 7, 7 } });
+
+    topology topology;
+    topology.add(data("weights", weights));
+    topology.add(input_layout("input", input_layout_dynamic));
+    topology.add(convolution("conv", input_info("input"), { "weights" }));
+    topology.add(reorder("reorder", input_info("conv"), format::any, data_types::f32));
+    topology.add(softmax("softmax", input_info("reorder"), 1));
+
+    ExecutionConfig config;
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+    config.set_property(ov::intel_gpu::optimize_data(true));
+    network network(engine, topology, config);
+    network.set_input_data("input", input);
+
+    network.execute();
+
+    auto prog = network.get_program();
+    ASSERT_NE(prog, nullptr);
+    auto& softmax_node = prog->get_node("softmax");
+    auto softmax_layout = softmax_node.get_output_layout();
+
+    ASSERT_EQ(softmax_layout.format.value, format::bfyx);
+}