Skip to content

Commit

Permalink
Add script to merge annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitry Sidnev committed Feb 14, 2019
1 parent cc55c30 commit 0bfe968
Show file tree
Hide file tree
Showing 2 changed files with 214 additions and 0 deletions.
20 changes: 20 additions & 0 deletions utils/coco/converter.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,23 @@ label1 label2
label3
```

## Merge several annotations in COCO representation into one

Run the script `merge_annotations.py`

```bash
python merge_annotations.py --input-dir /path/to/directory/with/datasets --output /path/to/result/annotation.json --images-map /path/to/file/with/matched/datasets/and/images.txt --draw /path/to/directory/where/save/images
```

Please run `python merge_annotations.py --help` for more details.

Example of a file for `--images-map`:

```bash
{
"dataset1_part1.json": "images/dataset1/part1",
"dataset1_part2.json": "images/dataset1/part2",
"dataset2_part1.json": "images/dataset2/part1",
"dataset2_part2.json": "images/dataset2/part2"
}
```
194 changes: 194 additions & 0 deletions utils/coco/merge_annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
import argparse
import cv2
import glog
import json
import numpy as np
import os

from difflib import SequenceMatcher
from tqdm import tqdm

from pycocotools import coco as coco_loader


def parse_args():
"""Parse arguments of command line"""
parser = argparse.ArgumentParser(
description='Merge annotations in COCO representation into one'
)
parser.add_argument(
'--input-dir', required=True,
help='directory with input annotations in *.json format'
)
parser.add_argument(
'--output', required=True,
help='output annotation file'
)
parser.add_argument(
'--images-map', required=True,
help='file with map of datasets and its images path (json format)'
)
parser.add_argument(
'--draw', default=None,
help='directory to save images with its segments. By default is disabled'
)
return parser.parse_args()


def draw_bboxes_and_masks(img, annotations, input_dir):
""" Draw bounding boxes and contours of masks on image and save it.
:param img: file name of image (is getting from the same field in annotation)
:param annotations: list of bonding boxes and segments on the image
:param input_dir: base directory to save images
"""
input_file = os.path.join(input_dir, img['file_name'])
save_path = os.path.join(os.path.dirname(input_file), 'draw')
if not os.path.exists(save_path):
os.makedirs(save_path)
output_file = os.path.join(save_path, os.path.basename(input_file))

img = cv2.imread(input_file)

yellow = (0, 255, 255)
red = (0, 0, 255)

for ann in annotations:
cat_id = str(ann['category_id'])
bbox = [int(ann['bbox'][0]), int(ann['bbox'][1]),
int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3])]
masks = ann['segmentation']

for mask in masks:
i = 0
points = []
while i < len(mask):
x = int(mask[i])
y = int(mask[i + 1])
points.append([x, y])
i += 2
img = cv2.polylines(img, np.int32([points]), True, yellow, 1)

img = cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), red, 1)
x = bbox[0] + (bbox[2] - bbox[0]) // 4
y = bbox[1] + (bbox[3] - bbox[1]) // 2
cv2.putText(img, cat_id, (x, y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, red, 1)
cv2.imwrite(output_file, img)


def is_json_file(filename):
""" Check if file has a *.json type (just check an extension)
:param filename: name of file
:return: True if file has a *.json type
"""
return True if filename.lower().endswith('.json') else False


def get_anno_list(directory):
""" Get list of files in directory
:param directory: directory to parse
:return: list of files in the directory in format [name1.ext, name2.ext, ...]
"""
files = []
for file in os.listdir(directory):
if is_json_file(file):
files.append(file)
return files


def common_path_images(images_map):
""" Define which part of paths to images is common for all of them
:param images_map: dictionary of matched datasets and its images paths. Format:
{
'dataset1.json': '/path/to/images/for/dataset1',
'dataset2.json': '/path/to/images/for/dataset2',
...
}
:return: string with a common part of the images paths
"""
common_path = ''
for _, path in images_map.items():
if common_path == '':
common_path = path
else:
match = SequenceMatcher(None, common_path, path).find_longest_match(0, len(common_path), 0, len(path))
common_path = common_path[match.a: match.a + match.size]
return common_path


def merge_annotations(directory, anno_list, images_map):
""" Merge several annotations in COCO representation into one
:param directory: base directory where is saved all datasets which is needed to merge
:param anno_list: list of annotations to merge. [dataset1.json, dataset2.json, ...]
:param images_map: dictionary of matched datasets and its images paths
:return: merged annotation
"""
merged_anno = None
first_step = True
reference_classes = None
common_path = common_path_images(images_map)
for anno_file in tqdm(anno_list, 'Parsing annotations...'):
if anno_file not in images_map:
glog.warning('Dataset <{}> is absent in images-map file and will be ignored!'.format(anno_file))
continue
img_prefix = images_map[anno_file].replace(common_path, '')
with open(os.path.join(directory, anno_file)) as f:
data = json.load(f)
for img in data['images']:
img['file_name'] = os.path.join(img_prefix, img['file_name'])
if first_step:
merged_anno = data
reference_classes = data['categories']
first_step = False
else:
classes = data['categories']
if classes != reference_classes:
glog.warning('Categories field in dataset <{}> has another classes and will be ignored!'
.format(anno_file))
continue
add_img_id = len(merged_anno['images'])
add_obj_id = len(merged_anno['annotations'])
for img in data['images']:
img['id'] += add_img_id
for ann in data['annotations']:
ann['id'] += add_obj_id
ann['image_id'] += add_img_id
merged_anno['images'].extend(data['images'])
merged_anno['annotations'].extend(data['annotations'])
return merged_anno


def main():
args = parse_args()
anno_list = get_anno_list(args.input_dir)
with open(args.images_map) as f:
images_map = json.load(f)

result_annotation = merge_annotations(args.input_dir, anno_list, images_map)

# Save created annotation
glog.info('Saving annotation...')
with open(args.output, 'w') as outfile:
json.dump(result_annotation, outfile)
glog.info('Annotation was saved in <{}> successfully'.format(args.output))

# Try to load created annotation via cocoapi
try:
glog.info('Trying to load annotation <{}> via cocoapi...'.format(args.output))
coco_loader.COCO(args.output)
except:
raise
else:
glog.info('Annotation in COCO representation <{}> created from \n<{}> \nsuccessfully!'
.format(args.output, anno_list))

if args.draw:
for img in tqdm(result_annotation['images'], 'Drawing and saving images...'):
ann_for_img = []
for ann in result_annotation['annotations']:
if ann['image_id'] == img['id']:
ann_for_img.append(ann)
draw_bboxes_and_masks(img, ann_for_img, args.draw)


if __name__ == "__main__":
main()

0 comments on commit 0bfe968

Please sign in to comment.