diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index 30bfef188e6ca1..362cf5bf63df4c 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -146,7 +146,8 @@ void remove_redundant_reorders::run(program& p) { bool remove_dep = r_dep_node.is_simple_reorder() && r_dep_node.get_users().size() == 1 && !r_dep_node.is_output() && - !r_dep_node.get_primitive()->has_surface_input(); + !r_dep_node.get_primitive()->has_surface_input() && + !r_node.get_primitive()->weights_reorder_params; // for chains like // fp32 -> reorder -> u8 -> reorder -> fp32 @@ -262,9 +263,7 @@ void remove_redundant_reorders::run(program& p) { if (!r_node.is_simple_reorder() || no_output_optimization || - r_node.get_primitive()->has_surface_input() || - (r_node.get_primitive()->weights_reorder_params && - r_node.get_primitive()->weights_reorder_params->should_be_transposed())) + r_node.get_primitive()->has_surface_input()) continue; auto o_layout = r_node.get_output_layout(); diff --git a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h index 787e8bc75d3e5f..00e48ec8715929 100644 --- a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h @@ -58,7 +58,8 @@ struct typed_program_node : public typed_program_node_base { bool is_simple_reorder() const { return !has_fused_primitives() && !has_mean() && - get_primitive()->subtract_per_feature.empty(); + get_primitive()->subtract_per_feature.empty() && + !get_primitive()->weights_reorder_params; } std::shared_ptr get_fuse_params() const override {