diff --git a/setup.cfg b/setup.cfg index 5587b04f12..6aec562119 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,6 +17,6 @@ line_length = 79 multi_line_output = 0 known_standard_library = pkg_resources,setuptools known_first_party = mmedit -known_third_party =PIL,cv2,lmdb,mmcv,numpy,onnx,onnxruntime,pytest,scipy,titlecase,torch,torchvision +known_third_party =PIL,cv2,lmdb,mmcv,numpy,onnx,onnxruntime,pymatting,pytest,scipy,titlecase,torch,torchvision,tqdm no_lines_before = STDLIB,LOCALFOLDER default_section = THIRDPARTY diff --git a/tools/data/matting/comp1k/README.md b/tools/data/matting/comp1k/README.md index 7d83d5e092..a4ea68b418 100644 --- a/tools/data/matting/comp1k/README.md +++ b/tools/data/matting/comp1k/README.md @@ -40,9 +40,16 @@ If you only want to composite test data (since compositing training data is time python tools/data/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit ``` -> Currently, only `GCA` supports online composition of training data. But you can modify the data pipeline of other models to perform online composition instead of loading composited images (we called it `merged` in our data pipeline). +If you only want to preprocess test data, i.e. for FBA, you can skip the train set by adding the `--skip_train` option: -## Check Directory Structure +```shell +# skip preprocessing training set +python tools/data/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit --skip_train +``` + +> Currently, `GCA` and `FBA` support online composition of training data. But you can modify the data pipeline of other models to perform online composition instead of loading composited images (we called it `merged` in our data pipeline). + +## Check Directory Structure for DIM The result folder structure should look like: @@ -74,3 +81,52 @@ mmediting │ ├── VOCdevkit │ │ ├── VOC2012 ``` +## Prepare the dataset for FBA + +For FBA, adopt dynamic dataset augmentation tricks proposed in [Learning-base Sampling for Natural Image Matting](https://openaccess.thecvf.com/content_CVPR_2019/papers/Tang_Learning-Based_Sampling_for_Natural_Image_Matting_CVPR_2019_paper.pdf) for train set. + +Prepare the test set as follows: + +```shell +# skip preprocessing training set +python tools/data/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit --skip_train +``` + +Extend the foreground as follows: + +```shell +python tools/data/matting/comp1k/extend_fg.py data/adobe_composition-1k data/coco +``` + +## Check Directory Structure for DIM + +The final folder structure should look like: + +```text +mmediting +├── mmedit +├── tools +├── configs +├── data +│ ├── adobe_composition-1k +│ │ ├── Test_set +│ │ │ ├── Adobe-licensed images +│ │ │ │ ├── alpha +│ │ │ │ ├── fg +│ │ │ │ ├── trimaps +│ │ │ ├── merged (generated by tools/data/matting/comp1k/preprocess_comp1k_dataset.py) +│ │ │ ├── bg (generated by tools/data/matting/comp1k/preprocess_comp1k_dataset.py) +│ │ ├── Training_set +│ │ │ ├── Adobe-licensed images +│ │ │ │ ├── alpha +│ │ │ │ ├── fg +│ │ │ │ ├── fg_extended (generated by tools/data/matting/comp1k/extend_fg.py) +│ │ │ ├── Other +│ │ │ │ ├── alpha +│ │ │ │ ├── fg +│ │ │ │ ├── fg_extended (generated by tools/data/matting/comp1k/extend_fg.py) +│ ├── coco +│ │ ├── train2014 (or train2017) +│ ├── VOCdevkit +│ │ ├── VOC2012 +``` diff --git a/tools/data/matting/comp1k/extend_fg.py b/tools/data/matting/comp1k/extend_fg.py new file mode 100644 index 0000000000..d3627904cb --- /dev/null +++ b/tools/data/matting/comp1k/extend_fg.py @@ -0,0 +1,130 @@ +import argparse +import os +import os.path as osp +import re +import subprocess +from multiprocessing import Pool + +import numpy as np +from PIL import Image +from pymatting import estimate_foreground_ml, load_image +from tqdm import tqdm + + +def fix_png_file(filename, folder): + """Fix png files in the target filename using pngfix. + + pngfix is a tool to fix PNG files. It's installed on Linux or MacOS by + default. + + Args: + filename (str): png file to run pngfix. + """ + subprocess.call( + f'pngfix --quiet --strip=color --prefix=fixed_ "{filename}"', + cwd=f'{folder}', + shell=True) + subprocess.call( + f'mv "fixed_{filename}" "{filename}"', cwd=f'{folder}', shell=True) + + +def join_first_contain(directories, filename, data_root): + """Join the first directory that contains the file. + + Args: + directories (list[str]): Directories to search for the file. + filename (str): The target filename. + data_root (str): Root of the data path. + """ + for directory in directories: + cur_path = osp.join(directory, filename) + if osp.exists(osp.join(data_root, cur_path)): + return cur_path + raise FileNotFoundError(f'Cannot find {filename} in dirs {directories}') + + +class ExtendFg: + + def __init__(self, data_root, fg_dirs, alpha_dirs) -> None: + self.data_root = data_root + self.fg_dirs = fg_dirs + self.alpha_dirs = alpha_dirs + + def extend(self, fg_name): + fg_name = fg_name.strip() + alpha_path = join_first_contain(self.alpha_dirs, fg_name, + self.data_root) + fg_path = join_first_contain(self.fg_dirs, fg_name, self.data_root) + alpha_path = osp.join(self.data_root, alpha_path) + fg_path = osp.join(self.data_root, fg_path) + extended_path = re.sub('/fg/', '/fg_extended/', fg_path) + extended_path = extended_path.replace('jpg', 'png') + if not osp.exists(alpha_path): + raise FileNotFoundError(f'{alpha_path} does not exist!') + if not osp.exists(fg_path): + raise FileNotFoundError(f'{fg_path} does not exist!') + + image = load_image(fg_path, 'RGB') + alpha = load_image(alpha_path, 'GRAY') + F = estimate_foreground_ml(image, alpha, return_background=False) + fg = Image.fromarray(np.uint8(F * 255)) + fg.save(extended_path) + fix_png_file(osp.basename(extended_path), osp.dirname(extended_path)) + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Prepare Adobe composition 1k dataset', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('data_root', help='Adobe composition 1k dataset root') + parser.add_argument('coco_root', help='COCO2014 or COCO2017 dataset root') + parser.add_argument( + '--nproc', type=int, default=4, help='number of processer') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + if not osp.exists(args.data_root): + raise FileNotFoundError(f'{args.data_root} does not exist!') + if not osp.exists(args.coco_root): + raise FileNotFoundError(f'{args.coco_root} does not exist!') + + data_root = args.data_root + + print('preparing training data...') + + dir_prefix = 'Training_set' + fname_prefix = 'training' + fg_dirs = [ + 'Training_set/Adobe-licensed images/fg', 'Training_set/Other/fg' + ] + alpha_dirs = [ + 'Training_set/Adobe-licensed images/alpha', 'Training_set/Other/alpha' + ] + extended_dirs = [ + 'Training_set/Adobe-licensed images/fg_extended', + 'Training_set/Other/fg_extended' + ] + for p in extended_dirs: + p = osp.join(data_root, p) + os.makedirs(p, exist_ok=True) + + fg_names = osp.join(dir_prefix, f'{fname_prefix}_fg_names.txt') + fg_names = open(osp.join(data_root, fg_names)).readlines() + fg_iter = iter(fg_names) + num = len(fg_names) + + extend_fg = ExtendFg(data_root, fg_dirs, alpha_dirs) + with Pool(processes=args.nproc) as p: + with tqdm(total=num) as pbar: + for i, _ in tqdm( + enumerate(p.imap_unordered(extend_fg.extend, fg_iter))): + pbar.update() + + print('train done') + + +if __name__ == '__main__': + main() diff --git a/tools/data/matting/comp1k/preprocess_comp1k_dataset.py b/tools/data/matting/comp1k/preprocess_comp1k_dataset.py index 3946f9c93c..625dcc85b6 100644 --- a/tools/data/matting/comp1k/preprocess_comp1k_dataset.py +++ b/tools/data/matting/comp1k/preprocess_comp1k_dataset.py @@ -233,6 +233,10 @@ def parse_args(): help='whether to composite training foreground and background offline') parser.add_argument( '--nproc', type=int, default=4, help='number of processer') + parser.add_argument( + '--skip_train', + action='store_true', + help='whether to skip the training data') args = parser.parse_args() return args @@ -247,17 +251,19 @@ def main(): raise FileNotFoundError(f'{args.voc_root} does not exist!') data_root = args.data_root - print('preparing training data...') - if osp.exists(osp.join(args.coco_root, 'train2017')): - train_source_bg_dir = osp.join(args.coco_root, 'train2017') - elif osp.exists(osp.join(args.coco_root, 'train2014')): - train_source_bg_dir = osp.join(args.coco_root, 'train2014') - else: - raise FileNotFoundError( - f'Could not find train2014 or train2017 under {args.coco_root}') - generate_json(data_root, train_source_bg_dir, args.composite, args.nproc, - 'training') - print('train done') + if not args.skip_train: + print('preparing training data...') + if osp.exists(osp.join(args.coco_root, 'train2017')): + train_source_bg_dir = osp.join(args.coco_root, 'train2017') + elif osp.exists(osp.join(args.coco_root, 'train2014')): + train_source_bg_dir = osp.join(args.coco_root, 'train2014') + else: + raise FileNotFoundError( + f'Could not find train2014 or train2017 under {args.coco_root}' + ) + generate_json(data_root, train_source_bg_dir, args.composite, + args.nproc, 'training') + print('train done') fg_dir = 'Test_set/Adobe-licensed images/fg' alpha_dir = 'Test_set/Adobe-licensed images/alpha'