Skip to content

Commit

Permalink
[Feature] Add script to crop REDS images into sub-images for faster IO (
Browse files Browse the repository at this point in the history
  • Loading branch information
ckkelvinchan authored Dec 26, 2021
1 parent 77c1d6b commit dea1e01
Show file tree
Hide file tree
Showing 3 changed files with 274 additions and 0 deletions.
42 changes: 42 additions & 0 deletions tools/data/super-resolution/reds/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,45 @@ If you want to use LMDB datasets for faster IO speed, you can make LMDB files by
```shell
python tools/data/super-resolution/reds/preprocess_reds_dataset.py --root-path ./data/REDS --make-lmdb
```

## Crop to sub-images

MMEditing also support cropping REDS images to sub-images for faster IO. We provide such a script:

```shell
python tools/data/super-resolution/reds/crop_sub_images.py --data-root ./data/REDS -scales 4
```

The generated data is stored under `REDS` and the data structure is as follows, where `_sub` indicates the sub-images.

```text
mmediting
├── mmedit
├── tools
├── configs
├── data
│ ├── REDS
│ │ ├── train_sharp
│ │ │ ├── 000
│ │ │ ├── 001
│ │ │ ├── ...
│ │ ├── train_sharp_sub
│ │ │ ├── 000_s001
│ │ │ ├── 000_s002
│ │ │ ├── ...
│ │ │ ├── 001_s001
│ │ │ ├── ...
│ │ ├── train_sharp_bicubic
│ │ │ ├── X4
│ │ │ │ ├── 000
│ │ │ │ ├── 001
│ │ │ │ ├── ...
│ │ │ ├── X4_sub
│ │ │ ├── 000_s001
│ │ │ ├── 000_s002
│ │ │ ├── ...
│ │ │ ├── 001_s001
│ │ │ ├── ...
```

Note that by default `preprocess_reds_dataset.py` does not make lmdb and annotation file for the cropped dataset. You may need to modify the scripts a little bit for such operations.
42 changes: 42 additions & 0 deletions tools/data/super-resolution/reds/README_zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,45 @@ mmediting
```shell
python tools/data/super-resolution/reds/preprocess_reds_dataset.py --root-path ./data/REDS --make-lmdb
```

## 裁剪为子图

MMEditing 支持将 REDS 图像裁剪为子图像以加快 IO。我们提供了这样一个脚本:

```shell
python tools/data/super-resolution/reds/crop_sub_images.py --data-root ./data/REDS -scales 4
```

生成的数据存储在 `REDS` 下,数据结构如下,其中`_sub`表示子图像。

```text
mmediting
├── mmedit
├── tools
├── configs
├── data
│ ├── REDS
│ │ ├── train_sharp
│ │ │ ├── 000
│ │ │ ├── 001
│ │ │ ├── ...
│ │ ├── train_sharp_sub
│ │ │ ├── 000_s001
│ │ │ ├── 000_s002
│ │ │ ├── ...
│ │ │ ├── 001_s001
│ │ │ ├── ...
│ │ ├── train_sharp_bicubic
│ │ │ ├── X4
│ │ │ │ ├── 000
│ │ │ │ ├── 001
│ │ │ │ ├── ...
│ │ │ ├── X4_sub
│ │ │ ├── 000_s001
│ │ │ ├── 000_s002
│ │ │ ├── ...
│ │ │ ├── 001_s001
│ │ │ ├── ...
```

请注意,默认情况下,`preprocess_reds_dataset.py` 不会为裁剪后的数据集制作 lmdb 和注释文件。您可能需要为此类操作稍微修改脚本。
190 changes: 190 additions & 0 deletions tools/data/super-resolution/reds/crop_sub_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import sys
from multiprocessing import Pool

import cv2
import mmcv
import numpy as np


def worker(path, opt):
"""Worker for each process.
Args:
path (str): Image path.
opt (dict): Configuration dict. It contains:
crop_size (int): Crop size.
step (int): Step for overlapped sliding window.
thresh_size (int): Threshold size. Patches whose size is smaller
than thresh_size will be dropped.
save_folder (str): Path to save folder.
compression_level (int): for cv2.IMWRITE_PNG_COMPRESSION.
Returns:
process_info (str): Process information displayed in progress bar.
"""
crop_size = opt['crop_size']
step = opt['step']
thresh_size = opt['thresh_size']
sequence, img_name = path.split('/')[-2:]
img_name, extension = osp.splitext(osp.basename(path))

img = mmcv.imread(path, flag='unchanged')

if img.ndim == 2 or img.ndim == 3:
h, w = img.shape[:2]
else:
raise ValueError(f'Image ndim should be 2 or 3, but got {img.ndim}')

h_space = np.arange(0, h - crop_size + 1, step)
if h - (h_space[-1] + crop_size) > thresh_size:
h_space = np.append(h_space, h - crop_size)
w_space = np.arange(0, w - crop_size + 1, step)
if w - (w_space[-1] + crop_size) > thresh_size:
w_space = np.append(w_space, w - crop_size)

index = 0
for x in h_space:
for y in w_space:
index += 1
cropped_img = img[x:x + crop_size, y:y + crop_size, ...]
sub_folder = osp.join(opt['save_folder'],
f'{sequence}_s{index:03d}')
mmcv.mkdir_or_exist(sub_folder)
cv2.imwrite(
osp.join(sub_folder, f'{img_name}{extension}'), cropped_img,
[cv2.IMWRITE_PNG_COMPRESSION, opt['compression_level']])
process_info = f'Processing {img_name} ...'
return process_info


def extract_subimages(opt):
"""Crop images to subimages.
Args:
opt (dict): Configuration dict. It contains:
input_folder (str): Path to the input folder.
save_folder (str): Path to save folder.
n_thread (int): Thread number.
"""
input_folder = opt['input_folder']
save_folder = opt['save_folder']
if not osp.exists(save_folder):
os.makedirs(save_folder)
print(f'mkdir {save_folder} ...')
else:
print(f'Folder {save_folder} already exists. Exit.')
sys.exit(1)

img_list = list(mmcv.scandir(input_folder, recursive=True))

img_list = [osp.join(input_folder, v) for v in img_list]
prog_bar = mmcv.ProgressBar(len(img_list))
pool = Pool(opt['n_thread'])
for path in img_list:
pool.apply_async(
worker, args=(path, opt), callback=lambda arg: prog_bar.update())
pool.close()
pool.join()
print('All processes done.')


def main_extract_subimages(args):
"""A multi-thread tool to crop large images to sub-images for faster IO.
It is used for REDS dataset.
opt (dict): Configuration dict. It contains:
n_thread (int): Thread number.
compression_level (int): CV_IMWRITE_PNG_COMPRESSION from 0 to 9.
A higher value means a smaller size and longer compression time.
Use 0 for faster CPU decompression. Default: 3, same in cv2.
scales (list[int]): The downsampling factors corresponding to the
LR folders you want to process.
Default: [].
input_folder (str): Path to the input folder.
save_folder (str): Path to save folder.
crop_size (int): Crop size.
step (int): Step for overlapped sliding window.
thresh_size (int): Threshold size. Patches whose size is lower
than thresh_size will be dropped.
Usage:
For each folder, run this script.
For example, if scales = [4], there are two folders to be processed:
train_sharp
train_sharp_bicubic/X4
After process, each sub_folder should have the same number of
subimages. You can also specify scales by modifying the argument
'scales'. Remember to modify opt configurations according to your
settings.
"""

opt = {}
opt['n_thread'] = args.n_thread
opt['compression_level'] = args.compression_level

# HR images
opt['input_folder'] = osp.join(args.data_root, 'train_sharp')
opt['save_folder'] = osp.join(args.data_root, 'train_sharp_sub')
opt['crop_size'] = args.crop_size
opt['step'] = args.step
opt['thresh_size'] = args.thresh_size
extract_subimages(opt)

for scale in args.scales:
opt['input_folder'] = osp.join(args.data_root,
f'train_sharp_bicubic/X{scale}')
opt['save_folder'] = osp.join(args.data_root,
f'train_sharp_bicubic/X{scale}_sub')
opt['crop_size'] = args.crop_size // scale
opt['step'] = args.step // scale
opt['thresh_size'] = args.thresh_size // scale
extract_subimages(opt)


def parse_args():
parser = argparse.ArgumentParser(
description='Preprocess REDS datasets',
epilog='You can first download REDS datasets using the script from:'
'https://gist.github.com/SeungjunNah/b10d369b92840cb8dd2118dd4f41d643')
parser.add_argument('--data-root', type=str, help='root path for REDS')
parser.add_argument(
'--scales', nargs='*', default=[], help='scale factor list')
parser.add_argument(
'--crop-size',
nargs='?',
default=480,
help='cropped size for HR images')
parser.add_argument(
'--step', nargs='?', default=240, help='step size for HR images')
parser.add_argument(
'--thresh-size',
nargs='?',
default=0,
help='threshold size for HR images')
parser.add_argument(
'--compression-level',
nargs='?',
default=3,
help='compression level when save png images')
parser.add_argument(
'--n-thread',
nargs='?',
default=20,
help='thread number when using multiprocessing')

args = parser.parse_args()
return args


if __name__ == '__main__':
args = parse_args()

# extract subimages
args.scales = [int(v) for v in args.scales]
main_extract_subimages(args)

0 comments on commit dea1e01

Please sign in to comment.