Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleMIX i…

…nto refine_pre_commit
WAYKEN-TSE · Aug 15, 2023 · 2678c4a · 2678c4a
2 parents 6f6e82a + 36317b2
commit 2678c4a
Show file tree

Hide file tree

Showing 19 changed files with 1,372 additions and 468 deletions.
diff --git a/paddlemix/datasets/coco_vqa.py b/paddlemix/datasets/coco_vqa.py
@@ -0,0 +1,135 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import json
+import os
+
+from paddlevlp.utils.env import DATA_HOME
+from paddlevlp.utils.log import logger
+
+from .dataset import DatasetBuilder
+
+# from paddle.dataset.common import md5file
+# from paddle.utils.download import get_path_from_url
+
+__all__ = ["VQADataset", "VQAEvalDataset"]
+
+
+class VQADataset(DatasetBuilder):
+    """
+    Caption dataset.
+    """
+
+    URL = "https://bj.bcebos.com/paddlemix/datasets/coco.tar.gz"
+    META_INFO = collections.namedtuple(
+        "META_INFO", ("images", "annotations", "images_md5", "annotations_md5"))
+    MD5 = ""
+    SPLITS = {
+        "train": META_INFO(
+            os.path.join("coco", "images"),
+            [
+                os.path.join("coco", "annotations/vqa_train.json"),
+                os.path.join("coco", "annotations/vqa_val.json")
+            ],
+            "",
+            "aa31ac474cf6250ebb81d18348a07ed8", ),
+        "val": META_INFO(
+            os.path.join("coco", "images"),
+            [
+                os.path.join("coco", "annotations/vqa_val_eval.json"),
+                os.path.join("coco", "annotations/answer_list.json"),
+                os.path.join(
+                    "coco",
+                    "annotations/v2_OpenEnded_mscoco_val2014_questions.json"),
+                os.path.join("coco",
+                             "annotations/v2_mscoco_val2014_annotations.json"),
+            ],
+            "",
+            "b273847456ef5580e33713b1f7de52a0", ),
+        "test": META_INFO(
+            os.path.join("coco", "images"),
+            [
+                os.path.join("coco", "annotation/vqa_test.json"),
+                os.path.join("coco", "annotation/vqa_test.json"),
+            ],
+            "",
+            "3ff34b0ef2db02d01c37399f6a2a6cd1", ),
+    }
+
+    def _get_data(self, mode, **kwargs):
+        # default_root = '/paddle/wangguanzhong/blip-jinman/PaddleNLP/blip2'
+        logger.info("default dataset root is {}".format(DATA_HOME))
+        images, annotations, image_hash, anno_hash = self.SPLITS[mode]
+        image_fullname = os.path.join(DATA_HOME, images)
+        if isinstance(annotations, (list, tuple)):
+            anno_fullname = []
+            for ann in annotations:
+                anno_fullname.append(os.path.join(DATA_HOME, ann))
+        else:
+            anno_fullname = os.path.join(DATA_HOME, annotations)
+        return image_fullname, anno_fullname, mode
+
+    def _read(self, filename, *args):
+        image_root, anno_path, mode = filename
+        annotations = []
+        if mode == "val" or mode == "test":
+            annotations = json.load(open(anno_path[0]))
+            image_ids = self._gen_image_id_eval(annotations)
+        else:
+            for ann_p in anno_path:
+                annotations.extend(json.load(open(ann_p, "r")))
+            image_ids = self._gen_image_id(annotations)
+        for ann in annotations:
+            image_path = os.path.join(image_root, ann["image"])
+            if mode == "train":
+                yield_data = {"image": image_path, }
+                yield_data["text_input"] = ann["question"]
+                yield_data["answers"]: ann["answers"]
+                yield_data["image_ids"]: ann["image_ids"]
+
+            else:
+                yield_data = {
+                    "image": image_path,
+                    "text_input": ann["question"],
+                    "question_id": ann["question_id"],
+                    "image_id":
+                    ann["image"].split("/")[-1].strip(".jpg").split("_")[-1]
+                }
+                yield_data["image_ids"]: ann["image_ids"]
+            yield yield_data
+
+    def _gen_image_id(self, anno):
+        img_ids = {}
+        n = 0
+        for ann in anno:
+            if "image_id" not in ann.keys():
+                img_id = ann["image"].split("/")[-1].strip(".jpg").split("_")[
+                    -1]
+            else:
+                img_id = ann["image_id"]
+            if img_id not in img_ids.keys():
+                img_ids[img_id] = n
+                n += 1
+        return img_ids
+
+    def _gen_image_id_eval(self, anno):
+        img_ids = {}
+        n = 0
+        for ann in anno:
+            img_id = ann["image"].split("/")[-1].strip(".jpg").split("_")[-1]
+            if img_id not in img_ids.keys():
+                img_ids[img_id] = n
+                n += 1
+        return img_ids
diff --git a/paddlemix/examples/blip2/export.py b/paddlemix/examples/blip2/export.py
@@ -36,8 +36,7 @@ class DataArguments:
     """
 
     input_image: str = field(
-        default="http://images.cocodataset.org/val2017/000000039769.jpg",
-        metadata={"help": "The name of input image."},
+        default="http://images.cocodataset.org/val2017/000000039769.jpg", metadata={"help": "The name of input image."}
     )  # "http://images.cocodataset.org/val2017/000000039769.jpg"
     prompt: str = field(
         default=None, metadata={"help": "The prompt of the image to be generated."}
@@ -77,9 +76,7 @@ def main():
     dtype = "float32"
     if model_args.fp16:
         decorated = paddle.amp.decorate(
-            models=[model.visual_encoder, model.language_model],
-            optimizers=None,
-            level="O2",
+            models=[model.visual_encoder, model.language_model], optimizers=None, level="O2"
         )
         model.visual_encoder, model.language_model = decorated
         dtype = "float16"

diff --git a/paddlemix/examples/blip2/run_eval.py → paddlemix/examples/blip2/run_eval_caption.py b/paddlemix/examples/blip2/run_eval.py → paddlemix/examples/blip2/run_eval_caption.py
@@ -53,8 +53,7 @@ class DataArguments:
         metadata={"help": "The name of the task to use (via the datasets library)."},
     )
     prompt: str = field(
-        default="a photo of ",
-        metadata={"help": "The prompt of the image to be generated."},
+        default="a photo of ", metadata={"help": "The prompt of the image to be generated."}
     )  # "Question: how many cats are there? Answer:"
 
 
@@ -89,12 +88,10 @@ class PreTrainingArguments(TrainingArguments):
     warmup_steps: int = field(default=2000, metadata={"help": "Number of warmup steps."})
     lr_scheduler_name: str = field(default="CosineDecayWithWarmup", metadata={"help": "The scheduler name to use."})
     per_device_train_batch_size: int = field(
-        default=128,
-        metadata={"help": "Batch size per GPU core/CPU for training. (default: 8)"},
+        default=128, metadata={"help": "Batch size per GPU core/CPU for training. (default: 8)"}
     )
     per_device_eval_batch_size: int = field(
-        default=1,
-        metadata={"help": " Batch size per GPU core/CPU for evaluation. (default:8)"},
+        default=1, metadata={"help": " Batch size per GPU core/CPU for evaluation. (default:8)"}
     )
     warmup_start_lr: float = field(default=1e-6, metadata={"help": " The initial learning rate of blip2."})
     output_dir: str = field(default=".", metadata={"help": "The output path"})
@@ -111,8 +108,7 @@ class PreTrainingArguments(TrainingArguments):
     )
     tensor_parallel_degree: int = field(default=1, metadata={"help": "Set the number of tensor model parallel"})
     sharding_parallel_degree: int = field(
-        default=1,
-        metadata={"help": "Set the number of sharding, enable sharding parallel"},
+        default=1, metadata={"help": "Set the number of sharding, enable sharding parallel"}
     )
     pipeline_parallel_degree: int = field(default=1, metadata={"help": "Enable pipeline parallel"})
     fp16_opt_level: str = field(default="O1", metadata={"help": "Mixed Precision Type"})
@@ -122,8 +118,7 @@ class PreTrainingArguments(TrainingArguments):
     )
     tensor_parallel_degree: int = field(default=1, metadata={"help": "Set the number of tensor model parallel"})
     sharding_parallel_degree: int = field(
-        default=1,
-        metadata={"help": "Set the number of sharding, enable sharding parallel"},
+        default=1, metadata={"help": "Set the number of sharding, enable sharding parallel"}
     )
     pipeline_parallel_degree: int = field(default=1, metadata={"help": "Enable pipeline parallel"})
     model_path: str = field(