Merge pull request PaddlePaddle#112 from LokeZhou/groundsam

Groundsam
WAYKEN-TSE · Aug 31, 2023 · c1c61dd · c1c61dd
2 parents 602795b + dd80f7c
commit c1c61dd
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 21 deletions.
diff --git a/deploy/groundingdino/README.md b/deploy/groundingdino/README.md
@@ -17,13 +17,14 @@ python setup_ms_deformable_attn_op.py install
 ## 2.2 Export model for static inference
 ```bash
 #export grounding dino model
-python export.py
+python export.py \
+--dino_type GroundingDino/groundingdino-swint-ogc
 
 
 #inference
  python predict.py  \
- --text_encoder_type GroundingDino/groundingdino-swint-ogc
- --model_path output_groundingdino \
+ --text_encoder_type GroundingDino/groundingdino-swint-ogc \
+ --model_path output_groundingdino/GroundingDino/groundingdino-swint-ogc \
  --input_image image_you_want_to_detect.jpg \
  -output_dir "dir you want to save the output" \
  -prompt "Detect Cat"

diff --git a/deploy/groundingdino/export.py b/deploy/groundingdino/export.py
@@ -77,7 +77,7 @@ def apply_to_static(model):
     )
     args = parser.parse_args()
 
-    output_dir = args.output_dir
+    output_dir = os.path.join(args.output_dir, args.dino_type)
     # load model
     model = GroundingDinoModel.from_pretrained(args.dino_type)
     model.eval()

diff --git a/paddlemix/appflow/openset_det_sam.py b/paddlemix/appflow/openset_det_sam.py
@@ -67,6 +67,20 @@ def _construct_input_spec(self):
             paddle.static.InputSpec(shape=[None, None], name="position_ids", dtype="int64"),
         ]
 
+    def _create_inputs(self, inputs):
+        input_map = {}
+        input_map["x"] = inputs["image_tensor"].numpy()
+        input_map["m"] = np.array(inputs["mask"].numpy(), dtype="int64")
+
+        for key in inputs["tokenized_out"].keys():
+            input_map[key] = np.array(inputs["tokenized_out"][key].numpy(), dtype="int64")
+
+            input_map[key] = np.array(inputs["tokenized_out"][key].numpy(), dtype="int64")
+
+        for name in self.input_names:
+            input_tensor = self.predictor.get_input_handle(name)
+            input_tensor.copy_from_cpu(input_map[name])
+
     def _construct_processor(self, model):
         """
         Construct the tokenizer for the predictor.
@@ -108,21 +122,10 @@ def _run_model(self, inputs):
         """
 
         if self._static_mode:
-
-            inputs["mask"] = paddle.cast(inputs["mask"], dtype="int64")
-            inputs["tokenized_out"]["text_self_attention_masks"] = paddle.cast(
-                inputs["tokenized_out"]["text_self_attention_masks"], dtype="int64"
-            )
-            [pred_boxes, pred_logits] = self.predictor.run(
-                [
-                    inputs["image_tensor"],
-                    inputs["mask"],
-                    inputs["tokenized_out"]["input_ids"],
-                    inputs["tokenized_out"]["attention_mask"],
-                    inputs["tokenized_out"]["text_self_attention_masks"],
-                    inputs["tokenized_out"]["position_ids"],
-                ]
-            )
+            self._create_inputs(inputs)
+            self.predictor.run()
+            pred_boxes = self.output_handle[0].copy_to_cpu()
+            pred_logits = self.output_handle[1].copy_to_cpu()
             result = {"pred_logits": pred_logits, "pred_boxes": pred_boxes}
         else:
             result = self._model(
@@ -232,6 +235,15 @@ def _construct_input_spec(self):
             paddle.static.InputSpec(shape=shape2, dtype="int32"),
         ]
 
+    def _create_inputs(self, inputs):
+        input_map = {}
+        input_map["img"] = inputs["image_seg"].numpy()
+        input_map["prompt"] = np.array(inputs["prompt"].numpy())
+
+        for name in self.input_names:
+            input_tensor = self.predictor.get_input_handle(name)
+            input_tensor.copy_from_cpu(input_map[name])
+
     def _construct_processor(self, model):
         """
         Construct the tokenizer for the predictor.
@@ -286,8 +298,9 @@ def _run_model(self, inputs):
             if self._input_type == "boxs":
                 inputs["prompt"] = inputs["prompt"].reshape([-1, 4])
 
-            result = self.predictor.run([inputs["image_seg"], inputs["prompt"]])
-            result = result[0]
+            self._create_inputs(inputs)
+            self.predictor.run()
+            result = self.output_handle[0].copy_to_cpu()
 
         else:
             result = self._model(img=inputs["image_seg"], prompt=inputs["prompt"])