From 4338988cb173ed45a767d9f22ade5d9bf7101385 Mon Sep 17 00:00:00 2001
From: Wangzheee <634486483@qq.com>
Date: Wed, 22 Mar 2023 12:11:00 +0000
Subject: [PATCH] add paddle-trt convert op: greater_equal

---
 .../fluid/inference/api/analysis_predictor.cc |   1 +
 .../tensorrt/convert/elementwise_op.cc        |  27 +++
 paddle/fluid/inference/tensorrt/op_teller.cc  |   7 +-
 .../test_trt_convert_compare_and_logical.py   | 159 ++++++++++++++++++
 4 files changed, 192 insertions(+), 2 deletions(-)
 mode change 100755 => 100644 paddle/fluid/inference/tensorrt/convert/elementwise_op.cc

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 06b020f56efec..41d5726c59886 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2404,6 +2404,7 @@ USE_TRT_CONVERTER(logical_or);
 USE_TRT_CONVERTER(logical_xor);
 USE_TRT_CONVERTER(logical_and);
 USE_TRT_CONVERTER(less_equal);
+USE_TRT_CONVERTER(greater_equal);
 USE_TRT_CONVERTER(transpose);
 USE_TRT_CONVERTER(transpose2);
 USE_TRT_CONVERTER(flatten);
diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
old mode 100755
new mode 100644
index e3df8337cb8cd..cf1e320b236ab
--- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
@@ -162,6 +162,26 @@ class ElementwiseTensorOpConverter : public OpConverter {
                                          *(equal_layer->getOutput(0)),
                                          nvinfer1::ElementWiseOperation::kOR);
 
+      RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
+    } else if (op_type_ == "greater_equal") {
+      auto* greater_layer =
+          TRT_ENGINE_ADD_LAYER(engine_,
+                               ElementWise,
+                               *X,
+                               *reshape_y_tensor,
+                               nvinfer1::ElementWiseOperation::kGREATER);
+      auto* equal_layer =
+          TRT_ENGINE_ADD_LAYER(engine_,
+                               ElementWise,
+                               *X,
+                               *reshape_y_tensor,
+                               nvinfer1::ElementWiseOperation::kEQUAL);
+      auto* layer = TRT_ENGINE_ADD_LAYER(engine_,
+                                         ElementWise,
+                                         *(greater_layer->getOutput(0)),
+                                         *(equal_layer->getOutput(0)),
+                                         nvinfer1::ElementWiseOperation::kOR);
+
       RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
     } else if (op_type_ == "mod") {
       auto* div_layer =
@@ -290,6 +310,11 @@ class ElementwiseTensorLessEqualOpConverter
  public:
   ElementwiseTensorLessEqualOpConverter() { op_type_ = "less_equal"; }
 };
+class ElementwiseTensorGreaterEqualOpConverter
+    : public ElementwiseTensorOpConverter {
+ public:
+  ElementwiseTensorGreaterEqualOpConverter() { op_type_ = "greater_equal"; }
+};
 class ElementwiseTensorModOpConverter : public ElementwiseTensorOpConverter {
  public:
   ElementwiseTensorModOpConverter() { op_type_ = "mod"; }
@@ -342,3 +367,5 @@ REGISTER_TRT_OP_CONVERTER(logical_or, ElementwiseTensorLogicalOrOpConverter);
 REGISTER_TRT_OP_CONVERTER(logical_xor, ElementwiseTensorLogicalXorOpConverter);
 REGISTER_TRT_OP_CONVERTER(logical_and, ElementwiseTensorLogicalAndOpConverter);
 REGISTER_TRT_OP_CONVERTER(less_equal, ElementwiseTensorLessEqualOpConverter);
+REGISTER_TRT_OP_CONVERTER(greater_equal,
+                          ElementwiseTensorGreaterEqualOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index f3a1179826607..1d1448b4166af 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1427,7 +1427,8 @@ struct SimpleOpTypeSetTeller : public Teller {
 
     if (op_type == "less_than" || op_type == "greater_than" ||
         op_type == "logical_or" || op_type == "logical_xor" ||
-        op_type == "logical_and" || op_type == "less_equal") {
+        op_type == "logical_and" || op_type == "less_equal" ||
+        op_type == "greater_equal") {
 #if IS_TRT_VERSION_GE(8400)
       // TRT does not support kEQUAL/kGREATER/kLESS work with implicit batch
       if (!with_dynamic_shape) {
@@ -1448,7 +1449,7 @@ struct SimpleOpTypeSetTeller : public Teller {
         }
       }
       if (op_type == "less_than" || op_type == "greater_than" ||
-          op_type == "less_equal") {
+          op_type == "less_equal" || op_type == "greater_equal") {
         if (x_dtype == framework::proto::VarType::BOOL ||
             y_dtype == framework::proto::VarType::BOOL) {
           VLOG(3)
@@ -2767,6 +2768,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "logical_xor",
       "logical_and",
       "less_equal",
+      "greater_equal",
       "dropout",
       "fill_any_like",
       "prelu",
@@ -2923,6 +2925,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "logical_xor",
       "logical_and",
       "less_equal",
+      "greater_equal",
       "dropout",
       "fill_any_like",
       "prelu",
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py
index 50159c222cc8a..79d157e9fe7db 100755
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py
@@ -481,6 +481,165 @@ def test(self):
         self.run_test()
 
 
+class TrtConvertGreaterEqualTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        for shape in [[2, 16], [2, 16, 32], [1, 32, 16, 32]]:
+            for op_type in ["greater_equal"]:
+                for axis in [-1]:
+                    self.dims = len(shape)
+                    dics = [
+                        {"axis": axis},
+                        {"in_dtype": 5, "out_dtype": 2},
+                        {"in_dtype": 0, "out_dtype": 5},
+                    ]
+                    ops_config = [
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["input_data1"]},
+                            "op_outputs": {"Out": ["cast_output_data1"]},
+                            "op_attrs": dics[1],
+                            "outputs_dtype": {"cast_output_data1": np.int32},
+                        },
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["input_data2"]},
+                            "op_outputs": {"Out": ["cast_output_data2"]},
+                            "op_attrs": dics[1],
+                            "outputs_dtype": {"cast_output_data2": np.int32},
+                        },
+                        {
+                            "op_type": op_type,
+                            "op_inputs": {
+                                "X": ["cast_output_data1"],
+                                "Y": ["cast_output_data2"],
+                            },
+                            "op_outputs": {"Out": ["cast_output_data0"]},
+                            "op_attrs": dics[0],
+                        },
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["cast_output_data0"]},
+                            "op_outputs": {"Out": ["output_data"]},
+                            "op_attrs": dics[2],
+                        },
+                    ]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data1": TensorConfig(
+                                data_gen=partial(generate_input, shape)
+                            ),
+                            "input_data2": TensorConfig(
+                                data_gen=partial(generate_input, shape)
+                            ),
+                        },
+                        outputs=["output_data"],
+                    )
+
+                    yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 2:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+            if self.dims == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+            if self.dims == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            ver = paddle_infer.get_trt_compile_version()
+            if (
+                ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400
+                or not dynamic_shape
+            ):
+                return 2, 5
+            else:
+                return 1, 3
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), (1e-3, 1e-3)
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), (1e-3, 1e-3)
+
+    def add_skip_trt_case(self):
+        pass
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
 class TrtConvertCompareSkipTest(TrtLayerAutoScanTest):
     def is_program_valid(self, program_config: ProgramConfig) -> bool:
         return True