Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend foreground. #199

Merged
merged 6 commits into from
Feb 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ line_length = 79
multi_line_output = 0
known_standard_library = pkg_resources,setuptools
known_first_party = mmedit
known_third_party =PIL,cv2,lmdb,mmcv,numpy,onnx,onnxruntime,pytest,scipy,titlecase,torch,torchvision
known_third_party =PIL,cv2,lmdb,mmcv,numpy,onnx,onnxruntime,pymatting,pytest,scipy,titlecase,torch,torchvision,tqdm
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
60 changes: 58 additions & 2 deletions tools/data/matting/comp1k/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,16 @@ If you only want to composite test data (since compositing training data is time
python tools/data/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit
```

> Currently, only `GCA` supports online composition of training data. But you can modify the data pipeline of other models to perform online composition instead of loading composited images (we called it `merged` in our data pipeline).
If you only want to preprocess test data, i.e. for FBA, you can skip the train set by adding the `--skip_train` option:

## Check Directory Structure
```shell
# skip preprocessing training set
python tools/data/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit --skip_train
```

> Currently, `GCA` and `FBA` support online composition of training data. But you can modify the data pipeline of other models to perform online composition instead of loading composited images (we called it `merged` in our data pipeline).

## Check Directory Structure for DIM

The result folder structure should look like:

Expand Down Expand Up @@ -74,3 +81,52 @@ mmediting
│ ├── VOCdevkit
│ │ ├── VOC2012
```
## Prepare the dataset for FBA

For FBA, adopt dynamic dataset augmentation tricks proposed in [Learning-base Sampling for Natural Image Matting](https://openaccess.thecvf.com/content_CVPR_2019/papers/Tang_Learning-Based_Sampling_for_Natural_Image_Matting_CVPR_2019_paper.pdf) for train set.

Prepare the test set as follows:

```shell
# skip preprocessing training set
python tools/data/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit --skip_train
```

Extend the foreground as follows:

```shell
python tools/data/matting/comp1k/extend_fg.py data/adobe_composition-1k data/coco
```

## Check Directory Structure for DIM

The final folder structure should look like:

```text
mmediting
├── mmedit
├── tools
├── configs
├── data
│ ├── adobe_composition-1k
│ │ ├── Test_set
│ │ │ ├── Adobe-licensed images
│ │ │ │ ├── alpha
│ │ │ │ ├── fg
│ │ │ │ ├── trimaps
│ │ │ ├── merged (generated by tools/data/matting/comp1k/preprocess_comp1k_dataset.py)
│ │ │ ├── bg (generated by tools/data/matting/comp1k/preprocess_comp1k_dataset.py)
│ │ ├── Training_set
│ │ │ ├── Adobe-licensed images
│ │ │ │ ├── alpha
│ │ │ │ ├── fg
│ │ │ │ ├── fg_extended (generated by tools/data/matting/comp1k/extend_fg.py)
│ │ │ ├── Other
│ │ │ │ ├── alpha
│ │ │ │ ├── fg
│ │ │ │ ├── fg_extended (generated by tools/data/matting/comp1k/extend_fg.py)
│ ├── coco
│ │ ├── train2014 (or train2017)
│ ├── VOCdevkit
│ │ ├── VOC2012
```
130 changes: 130 additions & 0 deletions tools/data/matting/comp1k/extend_fg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import argparse
import os
import os.path as osp
import re
import subprocess
from multiprocessing import Pool

import numpy as np
from PIL import Image
from pymatting import estimate_foreground_ml, load_image
from tqdm import tqdm


def fix_png_file(filename, folder):
"""Fix png files in the target filename using pngfix.

pngfix is a tool to fix PNG files. It's installed on Linux or MacOS by
default.

Args:
filename (str): png file to run pngfix.
"""
subprocess.call(
f'pngfix --quiet --strip=color --prefix=fixed_ "{filename}"',
cwd=f'{folder}',
shell=True)
subprocess.call(
f'mv "fixed_{filename}" "{filename}"', cwd=f'{folder}', shell=True)


def join_first_contain(directories, filename, data_root):
"""Join the first directory that contains the file.

Args:
directories (list[str]): Directories to search for the file.
filename (str): The target filename.
data_root (str): Root of the data path.
"""
for directory in directories:
cur_path = osp.join(directory, filename)
if osp.exists(osp.join(data_root, cur_path)):
return cur_path
raise FileNotFoundError(f'Cannot find {filename} in dirs {directories}')


class ExtendFg:

def __init__(self, data_root, fg_dirs, alpha_dirs) -> None:
self.data_root = data_root
self.fg_dirs = fg_dirs
self.alpha_dirs = alpha_dirs

def extend(self, fg_name):
fg_name = fg_name.strip()
alpha_path = join_first_contain(self.alpha_dirs, fg_name,
self.data_root)
fg_path = join_first_contain(self.fg_dirs, fg_name, self.data_root)
alpha_path = osp.join(self.data_root, alpha_path)
fg_path = osp.join(self.data_root, fg_path)
extended_path = re.sub('/fg/', '/fg_extended/', fg_path)
extended_path = extended_path.replace('jpg', 'png')
if not osp.exists(alpha_path):
raise FileNotFoundError(f'{alpha_path} does not exist!')
if not osp.exists(fg_path):
raise FileNotFoundError(f'{fg_path} does not exist!')

image = load_image(fg_path, 'RGB')
alpha = load_image(alpha_path, 'GRAY')
F = estimate_foreground_ml(image, alpha, return_background=False)
fg = Image.fromarray(np.uint8(F * 255))
fg.save(extended_path)
fix_png_file(osp.basename(extended_path), osp.dirname(extended_path))


def parse_args():
parser = argparse.ArgumentParser(
description='Prepare Adobe composition 1k dataset',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('data_root', help='Adobe composition 1k dataset root')
parser.add_argument('coco_root', help='COCO2014 or COCO2017 dataset root')
parser.add_argument(
'--nproc', type=int, default=4, help='number of processer')
args = parser.parse_args()
return args


def main():
args = parse_args()
if not osp.exists(args.data_root):
raise FileNotFoundError(f'{args.data_root} does not exist!')
if not osp.exists(args.coco_root):
raise FileNotFoundError(f'{args.coco_root} does not exist!')

data_root = args.data_root

print('preparing training data...')

dir_prefix = 'Training_set'
fname_prefix = 'training'
fg_dirs = [
'Training_set/Adobe-licensed images/fg', 'Training_set/Other/fg'
]
alpha_dirs = [
'Training_set/Adobe-licensed images/alpha', 'Training_set/Other/alpha'
]
extended_dirs = [
'Training_set/Adobe-licensed images/fg_extended',
'Training_set/Other/fg_extended'
]
for p in extended_dirs:
p = osp.join(data_root, p)
os.makedirs(p, exist_ok=True)

fg_names = osp.join(dir_prefix, f'{fname_prefix}_fg_names.txt')
fg_names = open(osp.join(data_root, fg_names)).readlines()
fg_iter = iter(fg_names)
num = len(fg_names)

extend_fg = ExtendFg(data_root, fg_dirs, alpha_dirs)
with Pool(processes=args.nproc) as p:
with tqdm(total=num) as pbar:
for i, _ in tqdm(
enumerate(p.imap_unordered(extend_fg.extend, fg_iter))):
pbar.update()

print('train done')


if __name__ == '__main__':
main()
28 changes: 17 additions & 11 deletions tools/data/matting/comp1k/preprocess_comp1k_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ def parse_args():
help='whether to composite training foreground and background offline')
parser.add_argument(
'--nproc', type=int, default=4, help='number of processer')
parser.add_argument(
'--skip_train',
action='store_true',
help='whether to skip the training data')
args = parser.parse_args()
return args

Expand All @@ -247,17 +251,19 @@ def main():
raise FileNotFoundError(f'{args.voc_root} does not exist!')
data_root = args.data_root

print('preparing training data...')
if osp.exists(osp.join(args.coco_root, 'train2017')):
train_source_bg_dir = osp.join(args.coco_root, 'train2017')
elif osp.exists(osp.join(args.coco_root, 'train2014')):
train_source_bg_dir = osp.join(args.coco_root, 'train2014')
else:
raise FileNotFoundError(
f'Could not find train2014 or train2017 under {args.coco_root}')
generate_json(data_root, train_source_bg_dir, args.composite, args.nproc,
'training')
print('train done')
if not args.skip_train:
print('preparing training data...')
if osp.exists(osp.join(args.coco_root, 'train2017')):
train_source_bg_dir = osp.join(args.coco_root, 'train2017')
elif osp.exists(osp.join(args.coco_root, 'train2014')):
train_source_bg_dir = osp.join(args.coco_root, 'train2014')
else:
raise FileNotFoundError(
f'Could not find train2014 or train2017 under {args.coco_root}'
)
generate_json(data_root, train_source_bg_dir, args.composite,
args.nproc, 'training')
print('train done')

fg_dir = 'Test_set/Adobe-licensed images/fg'
alpha_dir = 'Test_set/Adobe-licensed images/alpha'
Expand Down