open-mmlab · innerlee · Feb 18, 2021 · Feb 17, 2021 · Feb 17, 2021 · Feb 17, 2021
diff --git a/setup.cfg b/setup.cfg
@@ -17,6 +17,6 @@ line_length = 79
 multi_line_output = 0
 known_standard_library = pkg_resources,setuptools
 known_first_party = mmedit
-known_third_party =PIL,cv2,lmdb,mmcv,numpy,onnx,onnxruntime,pytest,scipy,titlecase,torch,torchvision
+known_third_party =PIL,cv2,lmdb,mmcv,numpy,onnx,onnxruntime,pymatting,pytest,scipy,titlecase,torch,torchvision,tqdm
 no_lines_before = STDLIB,LOCALFOLDER
 default_section = THIRDPARTY
diff --git a/tools/data/matting/comp1k/README.md b/tools/data/matting/comp1k/README.md
@@ -40,9 +40,16 @@ If you only want to composite test data (since compositing training data is time
 python tools/data/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit
 ```
 
-> Currently, only `GCA` supports online composition of training data. But you can modify the data pipeline of other models to perform online composition instead of loading composited images (we called it `merged` in our data pipeline).
+If you only want to preprocess test data, i.e. for FBA, you can skip the train set by adding the `--skip_train` option:
 
-## Check Directory Structure
+```shell
+# skip preprocessing training set
+python tools/data/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit --skip_train
+```
+
+> Currently, `GCA` and `FBA` support online composition of training data. But you can modify the data pipeline of other models to perform online composition instead of loading composited images (we called it `merged` in our data pipeline).
+
+## Check Directory Structure for DIM
 
 The result folder structure should look like:
 
@@ -74,3 +81,52 @@ mmediting
 │   ├── VOCdevkit
 │   │   ├── VOC2012
 ```
+## Prepare the dataset for FBA
+
+For FBA, adopt dynamic dataset augmentation tricks proposed in [Learning-base Sampling for Natural Image Matting](https://openaccess.thecvf.com/content_CVPR_2019/papers/Tang_Learning-Based_Sampling_for_Natural_Image_Matting_CVPR_2019_paper.pdf) for train set.
+
+Prepare the test set as follows:
+
+```shell
+# skip preprocessing training set
+python tools/data/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit --skip_train
+```
+
+Extend the foreground as follows:
+
+```shell
+python tools/data/matting/comp1k/extend_fg.py data/adobe_composition-1k data/coco
+```
+
+## Check Directory Structure for DIM
+
+The final folder structure should look like:
+
+```text
+mmediting
+├── mmedit
+├── tools
+├── configs
+├── data
+│   ├── adobe_composition-1k
+│   │   ├── Test_set
+│   │   │   ├── Adobe-licensed images
+│   │   │   │   ├── alpha
+│   │   │   │   ├── fg
+│   │   │   │   ├── trimaps
+│   │   │   ├── merged  (generated by tools/data/matting/comp1k/preprocess_comp1k_dataset.py)
+│   │   │   ├── bg      (generated by tools/data/matting/comp1k/preprocess_comp1k_dataset.py)
+│   │   ├── Training_set
+│   │   │   ├── Adobe-licensed images
+│   │   │   │   ├── alpha
+│   │   │   │   ├── fg
+│   │   │   │   ├── fg_extended (generated by tools/data/matting/comp1k/extend_fg.py)
+│   │   │   ├── Other
+│   │   │   │   ├── alpha
+│   │   │   │   ├── fg
+│   │   │   │   ├── fg_extended (generated by tools/data/matting/comp1k/extend_fg.py)
+│   ├── coco
+│   │   ├── train2014   (or train2017)
+│   ├── VOCdevkit
+│   │   ├── VOC2012
+```
diff --git a/tools/data/matting/comp1k/extend_fg.py b/tools/data/matting/comp1k/extend_fg.py
@@ -0,0 +1,130 @@
+import argparse
+import os
+import os.path as osp
+import re
+import subprocess
+from multiprocessing import Pool
+
+import numpy as np
+from PIL import Image
+from pymatting import estimate_foreground_ml, load_image
+from tqdm import tqdm
+
+
+def fix_png_file(filename, folder):
+    """Fix png files in the target filename using pngfix.
+
+    pngfix is a tool to fix PNG files. It's installed on Linux or MacOS by
+    default.
+
+    Args:
+        filename (str): png file to run pngfix.
+    """
+    subprocess.call(
+        f'pngfix --quiet --strip=color --prefix=fixed_ "{filename}"',
+        cwd=f'{folder}',
+        shell=True)
+    subprocess.call(
+        f'mv "fixed_{filename}" "{filename}"', cwd=f'{folder}', shell=True)
+
+
+def join_first_contain(directories, filename, data_root):
+    """Join the first directory that contains the file.
+
+    Args:
+        directories (list[str]): Directories to search for the file.
+        filename (str): The target filename.
+        data_root (str): Root of the data path.
+    """
+    for directory in directories:
+        cur_path = osp.join(directory, filename)
+        if osp.exists(osp.join(data_root, cur_path)):
+            return cur_path
+    raise FileNotFoundError(f'Cannot find {filename} in dirs {directories}')
+
+
+class ExtendFg:
+
+    def __init__(self, data_root, fg_dirs, alpha_dirs) -> None:
+        self.data_root = data_root
+        self.fg_dirs = fg_dirs
+        self.alpha_dirs = alpha_dirs
+
+    def extend(self, fg_name):
+        fg_name = fg_name.strip()
+        alpha_path = join_first_contain(self.alpha_dirs, fg_name,
+                                        self.data_root)
+        fg_path = join_first_contain(self.fg_dirs, fg_name, self.data_root)
+        alpha_path = osp.join(self.data_root, alpha_path)
+        fg_path = osp.join(self.data_root, fg_path)
+        extended_path = re.sub('/fg/', '/fg_extended/', fg_path)
+        extended_path = extended_path.replace('jpg', 'png')
+        if not osp.exists(alpha_path):
+            raise FileNotFoundError(f'{alpha_path} does not exist!')
+        if not osp.exists(fg_path):
+            raise FileNotFoundError(f'{fg_path} does not exist!')
+
+        image = load_image(fg_path, 'RGB')
+        alpha = load_image(alpha_path, 'GRAY')
+        F = estimate_foreground_ml(image, alpha, return_background=False)
+        fg = Image.fromarray(np.uint8(F * 255))
+        fg.save(extended_path)
+        fix_png_file(osp.basename(extended_path), osp.dirname(extended_path))
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Prepare Adobe composition 1k dataset',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('data_root', help='Adobe composition 1k dataset root')
+    parser.add_argument('coco_root', help='COCO2014 or COCO2017 dataset root')
+    parser.add_argument(
+        '--nproc', type=int, default=4, help='number of processer')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    if not osp.exists(args.data_root):
+        raise FileNotFoundError(f'{args.data_root} does not exist!')
+    if not osp.exists(args.coco_root):
+        raise FileNotFoundError(f'{args.coco_root} does not exist!')
+
+    data_root = args.data_root
+
+    print('preparing training data...')
+
+    dir_prefix = 'Training_set'
+    fname_prefix = 'training'
+    fg_dirs = [
+        'Training_set/Adobe-licensed images/fg', 'Training_set/Other/fg'
+    ]
+    alpha_dirs = [
+        'Training_set/Adobe-licensed images/alpha', 'Training_set/Other/alpha'
+    ]
+    extended_dirs = [
+        'Training_set/Adobe-licensed images/fg_extended',
+        'Training_set/Other/fg_extended'
+    ]
+    for p in extended_dirs:
+        p = osp.join(data_root, p)
+        os.makedirs(p, exist_ok=True)
+
+    fg_names = osp.join(dir_prefix, f'{fname_prefix}_fg_names.txt')
+    fg_names = open(osp.join(data_root, fg_names)).readlines()
+    fg_iter = iter(fg_names)
+    num = len(fg_names)
+
+    extend_fg = ExtendFg(data_root, fg_dirs, alpha_dirs)
+    with Pool(processes=args.nproc) as p:
+        with tqdm(total=num) as pbar:
+            for i, _ in tqdm(
+                    enumerate(p.imap_unordered(extend_fg.extend, fg_iter))):
+                pbar.update()
+
+    print('train done')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/data/matting/comp1k/preprocess_comp1k_dataset.py b/tools/data/matting/comp1k/preprocess_comp1k_dataset.py
@@ -233,6 +233,10 @@ def parse_args():
         help='whether to composite training foreground and background offline')
     parser.add_argument(
         '--nproc', type=int, default=4, help='number of processer')
+    parser.add_argument(
+        '--skip_train',
+        action='store_true',
+        help='whether to skip the training data')
     args = parser.parse_args()
     return args
 
@@ -247,17 +251,19 @@ def main():
         raise FileNotFoundError(f'{args.voc_root} does not exist!')
     data_root = args.data_root
 
-    print('preparing training data...')
-    if osp.exists(osp.join(args.coco_root, 'train2017')):
-        train_source_bg_dir = osp.join(args.coco_root, 'train2017')
-    elif osp.exists(osp.join(args.coco_root, 'train2014')):
-        train_source_bg_dir = osp.join(args.coco_root, 'train2014')
-    else:
-        raise FileNotFoundError(
-            f'Could not find train2014 or train2017 under {args.coco_root}')
-    generate_json(data_root, train_source_bg_dir, args.composite, args.nproc,
-                  'training')
-    print('train done')
+    if not args.skip_train:
+        print('preparing training data...')
+        if osp.exists(osp.join(args.coco_root, 'train2017')):
+            train_source_bg_dir = osp.join(args.coco_root, 'train2017')
+        elif osp.exists(osp.join(args.coco_root, 'train2014')):
+            train_source_bg_dir = osp.join(args.coco_root, 'train2014')
+        else:
+            raise FileNotFoundError(
+                f'Could not find train2014 or train2017 under {args.coco_root}'
+            )
+        generate_json(data_root, train_source_bg_dir, args.composite,
+                      args.nproc, 'training')
+        print('train done')
 
     fg_dir = 'Test_set/Adobe-licensed images/fg'
     alpha_dir = 'Test_set/Adobe-licensed images/alpha'