Skip to content

Commit

Permalink
Refactor image saving in formats, fix vggface2 and widerface (cvat-ai…
Browse files Browse the repository at this point in the history
…#154)

- Fixed image saving in VggFace2 and Widerface. Formats should not convert extensions, unless requested
- Refactored image saving in formats
  • Loading branch information
Maxim Zhiltsov authored Mar 10, 2021
1 parent d6902a8 commit 944e9f9
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 88 deletions.
16 changes: 12 additions & 4 deletions datumaro/components/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,23 @@ def _find_image_ext(self, item):

return self._image_ext or src_ext or self._default_image_ext

def _make_image_filename(self, item):
return item.id + self._find_image_ext(item)
def _make_image_filename(self, item, *, name=None, subdir=None):
name = name or item.id
subdir = subdir or ''
return osp.join(subdir, name + self._find_image_ext(item))

def _save_image(self, item, path=None, *,
name=None, subdir=None, basedir=None):
assert not ((subdir or name or basedir) and path), \
"Can't use both subdir or name or basedir and path arguments"

def _save_image(self, item, path=None):
if not item.image.has_data:
log.warning("Item '%s' has no image", item.id)
return

path = path or self._make_image_filename(item)
basedir = basedir or self._save_dir
path = path or osp.join(basedir,
self._make_image_filename(item, name=name, subdir=subdir))
path = osp.abspath(path)

src_ext = item.image.ext.lower()
Expand Down
79 changes: 41 additions & 38 deletions datumaro/plugins/camvid_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@
#
# SPDX-License-Identifier: MIT

import logging as log
import os
import os.path as osp
from collections import OrderedDict
from enum import Enum
from glob import glob

import numpy as np

from datumaro.components.converter import Converter
from datumaro.components.extractor import (AnnotationType, CompiledMask,
DatasetItem, Importer, LabelCategories, Mask,
MaskCategories, SourceExtractor)
from datumaro.util import find, str_to_bool
from datumaro.util.annotation_util import make_label_id_mapping
from datumaro.util.image import save_image
from datumaro.util.mask_tools import lazy_mask, paint_mask, generate_colormap

Expand Down Expand Up @@ -57,7 +59,8 @@
class CamvidPath:
LABELMAP_FILE = 'label_colors.txt'
SEGM_DIR = "annot"
IMAGE_EXT = '.png'
IMAGE_EXT = '.jpg'
MASK_EXT = '.png'


def parse_label_map(path):
Expand Down Expand Up @@ -198,7 +201,7 @@ def find_sources(cls, path):
LabelmapType = Enum('LabelmapType', ['camvid', 'source'])

class CamvidConverter(Converter):
DEFAULT_IMAGE_EXT = '.png'
DEFAULT_IMAGE_EXT = CamvidPath.IMAGE_EXT

@classmethod
def build_cmdline_parser(cls, **kwargs):
Expand All @@ -221,12 +224,15 @@ def __init__(self, extractor, save_dir,
self._load_categories(label_map)

def apply(self):
subset_dir = self._save_dir
os.makedirs(subset_dir, exist_ok=True)
os.makedirs(self._save_dir, exist_ok=True)

for subset_name, subset in self._extractor.subsets().items():
segm_list = {}
for item in subset:
image_path = self._make_image_filename(item, subdir=subset_name)
if self._save_images:
self._save_image(item, osp.join(self._save_dir, image_path))

masks = [a for a in item.annotations
if a.type == AnnotationType.mask]

Expand All @@ -235,17 +241,13 @@ def apply(self):
instance_labels=[self._label_id_mapping(m.label)
for m in masks])

self.save_segm(osp.join(subset_dir,
subset_name + CamvidPath.SEGM_DIR,
item.id + CamvidPath.IMAGE_EXT),
mask_path = osp.join(subset_name + CamvidPath.SEGM_DIR,
item.id + CamvidPath.MASK_EXT)
self.save_segm(osp.join(self._save_dir, mask_path),
compiled_mask.class_mask)
segm_list[item.id] = True
segm_list[item.id] = (image_path, mask_path)
else:
segm_list[item.id] = False

if self._save_images:
self._save_image(item, osp.join(subset_dir, subset_name,
item.id + CamvidPath.IMAGE_EXT))
segm_list[item.id] = (image_path, '')

self.save_segm_lists(subset_name, segm_list)
self.save_label_map()
Expand All @@ -263,14 +265,11 @@ def save_segm_lists(self, subset_name, segm_list):

ann_file = osp.join(self._save_dir, subset_name + '.txt')
with open(ann_file, 'w') as f:
for item in segm_list:
if segm_list[item]:
path_mask = '/%s/%s' % (subset_name + CamvidPath.SEGM_DIR,
item + CamvidPath.IMAGE_EXT)
else:
path_mask = ''
f.write('/%s/%s %s\n' % (subset_name,
item + CamvidPath.IMAGE_EXT, path_mask))
for (image_path, mask_path) in segm_list.values():
f.write('/%s %s\n' % (
image_path.replace('\\', '/'),
mask_path.replace('\\', '/'))
)

def save_label_map(self):
path = osp.join(self._save_dir, CamvidPath.LABELMAP_FILE)
Expand Down Expand Up @@ -320,20 +319,24 @@ def _load_categories(self, label_map_source):
self._label_id_mapping = self._make_label_id_map()

def _make_label_id_map(self):
source_labels = {
id: label.name for id, label in
enumerate(self._extractor.categories().get(
AnnotationType.label, LabelCategories()).items)
}
target_labels = {
label.name: id for id, label in
enumerate(self._categories[AnnotationType.label].items)
}
id_mapping = {
src_id: target_labels.get(src_label, 0)
for src_id, src_label in source_labels.items()
}

def map_id(src_id):
return id_mapping.get(src_id, 0)
map_id, id_mapping, src_labels, dst_labels = make_label_id_mapping(
self._extractor.categories().get(AnnotationType.label),
self._categories[AnnotationType.label])

void_labels = [src_label for src_id, src_label in src_labels.items()
if src_label not in dst_labels]
if void_labels:
log.warning("The following labels are remapped to background: %s" %
', '.join(void_labels))
log.debug("Saving segmentations with the following label mapping: \n%s" %
'\n'.join(["#%s '%s' -> #%s '%s'" %
(
src_id, src_label, id_mapping[src_id],
self._categories[AnnotationType.label] \
.items[id_mapping[src_id]].name
)
for src_id, src_label in src_labels.items()
])
)

return map_id
3 changes: 1 addition & 2 deletions datumaro/plugins/image_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def apply(self):

for item in self._extractor:
if item.has_image:
self._save_image(item,
osp.join(self._save_dir, self._make_image_filename(item)))
self._save_image(item)
else:
log.debug("Item '%s' has no image info", item.id)
4 changes: 1 addition & 3 deletions datumaro/plugins/imagenet_txt_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,7 @@ def apply(self):
if p.type == AnnotationType.label]

if self._save_images and item.has_image:
self._save_image(item,
osp.join(self._save_dir, ImagenetTxtPath.IMAGE_DIR,
self._make_image_filename(item)))
self._save_image(item, subdir=ImagenetTxtPath.IMAGE_DIR)

with open(annotation_file, 'w', encoding='utf-8') as f:
f.writelines(['%s %s\n' % (item_id, ' '.join(labels[item_id]))
Expand Down
9 changes: 4 additions & 5 deletions datumaro/plugins/mot_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,8 @@ class MotSeqGtConverter(Converter):
def apply(self):
extractor = self._extractor

images_dir = osp.join(self._save_dir, MotPath.IMAGE_DIR)
os.makedirs(images_dir, exist_ok=True)
self._images_dir = images_dir
image_dir = osp.join(self._save_dir, MotPath.IMAGE_DIR)
os.makedirs(image_dir, exist_ok=True)

anno_dir = osp.join(self._save_dir, 'gt')
os.makedirs(anno_dir, exist_ok=True)
Expand Down Expand Up @@ -259,8 +258,8 @@ def apply(self):

if self._save_images:
if item.has_image and item.image.has_data:
self._save_image(item, osp.join(self._images_dir,
'%06d%s' % (frame_id, self._find_image_ext(item))))
self._save_image(item, subdir=image_dir,
name='%06d' % frame_id)
else:
log.debug("Item '%s' has no image", item.id)

Expand Down
5 changes: 2 additions & 3 deletions datumaro/plugins/mots_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ class MotsPngConverter(Converter):
def apply(self):
for subset_name, subset in self._extractor.subsets().items():
subset_dir = osp.join(self._save_dir, subset_name)
images_dir = osp.join(subset_dir, MotsPath.IMAGE_DIR)
image_dir = osp.join(subset_dir, MotsPath.IMAGE_DIR)
anno_dir = osp.join(subset_dir, MotsPath.MASKS_DIR)
os.makedirs(anno_dir, exist_ok=True)

Expand All @@ -120,8 +120,7 @@ def apply(self):

if self._save_images:
if item.has_image and item.image.has_data:
self._save_image(item,
osp.join(images_dir, self._make_image_filename(item)))
self._save_image(item, subdir=image_dir)
else:
log.debug("Item '%s' has no image", item.id)

Expand Down
13 changes: 5 additions & 8 deletions datumaro/plugins/vgg_face2_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def find_sources(cls, path):
not osp.basename(p).startswith(VggFace2Path.BBOXES_FILE))

class VggFace2Converter(Converter):
DEFAULT_IMAGE_EXT = '.jpg'
DEFAULT_IMAGE_EXT = VggFace2Path.IMAGE_EXT

def apply(self):
save_dir = self._save_dir
Expand All @@ -148,7 +148,6 @@ def apply(self):
label_categories = self._extractor.categories()[AnnotationType.label]

for subset_name, subset in self._extractor.subsets().items():
subset_dir = osp.join(save_dir, subset_name)
bboxes_table = []
landmarks_table = []
for item in subset:
Expand All @@ -157,13 +156,11 @@ def apply(self):
if getattr(p, 'label') != None)
if labels:
for label in labels:
self._save_image(item, osp.join(subset_dir,
label_categories[label].name + '/' \
+ item.id + VggFace2Path.IMAGE_EXT))
self._save_image(item, subdir=osp.join(subset_name,
label_categories[label].name))
else:
self._save_image(item, osp.join(subset_dir,
VggFace2Path.IMAGES_DIR_NO_LABEL,
item.id + VggFace2Path.IMAGE_EXT))
self._save_image(item, subdir=osp.join(subset_name,
VggFace2Path.IMAGES_DIR_NO_LABEL))

landmarks = [a for a in item.annotations
if a.type == AnnotationType.points]
Expand Down
25 changes: 7 additions & 18 deletions datumaro/plugins/voc_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from datumaro.components.extractor import (AnnotationType,
CompiledMask, DatasetItem, LabelCategories)
from datumaro.util import find, str_to_bool
from datumaro.util.annotation_util import make_label_id_mapping
from datumaro.util.image import save_image
from datumaro.util.mask_tools import paint_mask, remap_mask

Expand Down Expand Up @@ -562,22 +563,12 @@ def _get_actions(self, label):
return label_desc[2]

def _make_label_id_map(self):
source_labels = {
id: label.name for id, label in
enumerate(self._extractor.categories().get(
AnnotationType.label, LabelCategories()).items)
}
target_labels = {
label.name: id for id, label in
enumerate(self._categories[AnnotationType.label].items)
}
id_mapping = {
src_id: target_labels.get(src_label, 0)
for src_id, src_label in source_labels.items()
}
map_id, id_mapping, src_labels, dst_labels = make_label_id_mapping(
self._extractor.categories().get(AnnotationType.label),
self._categories[AnnotationType.label])

void_labels = [src_label for src_id, src_label in source_labels.items()
if src_label not in target_labels]
void_labels = [src_label for src_id, src_label in src_labels.items()
if src_label not in dst_labels]
if void_labels:
log.warning("The following labels are remapped to background: %s" %
', '.join(void_labels))
Expand All @@ -588,12 +579,10 @@ def _make_label_id_map(self):
self._categories[AnnotationType.label] \
.items[id_mapping[src_id]].name
)
for src_id, src_label in source_labels.items()
for src_id, src_label in src_labels.items()
])
)

def map_id(src_id):
return id_mapping.get(src_id, 0)
return map_id

def _remap_mask(self, mask):
Expand Down
12 changes: 6 additions & 6 deletions datumaro/plugins/widerface_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def find_sources(cls, path):
dirname=WiderFacePath.ANNOTATIONS_DIR)

class WiderFaceConverter(Converter):
DEFAULT_IMAGE_EXT = '.jpg'
DEFAULT_IMAGE_EXT = WiderFacePath.IMAGE_EXT

def apply(self):
save_dir = self._save_dir
Expand All @@ -143,12 +143,12 @@ def apply(self):
labels = [a.label for a in item.annotations
if a.type == AnnotationType.label]
if labels:
image_path = '%s--%s/%s' % (
labels[0], label_categories[labels[0]].name,
item.id + WiderFacePath.IMAGE_EXT)
image_path = self._make_image_filename(item,
subdir='%s--%s' % (
labels[0], label_categories[labels[0]].name))
else:
image_path = '%s/%s' % (WiderFacePath.IMAGES_DIR_NO_LABEL,
item.id + WiderFacePath.IMAGE_EXT)
image_path = self._make_image_filename(item,
subdir=WiderFacePath.IMAGES_DIR_NO_LABEL)
wider_annotation += image_path + '\n'
if item.has_image and self._save_images:
self._save_image(item, osp.join(save_dir, subset_dir,
Expand Down
19 changes: 18 additions & 1 deletion datumaro/util/annotation_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

import numpy as np

from datumaro.components.extractor import _Shape, Mask, AnnotationType, RleMask
from datumaro.components.extractor import (LabelCategories, _Shape, Mask,
AnnotationType, RleMask)
from datumaro.util.mask_tools import mask_to_rle


Expand Down Expand Up @@ -210,3 +211,19 @@ def smooth_line(points, segments):
new_points[new_segment] = prev_p * (1 - r) + next_p * r

return new_points, step

def make_label_id_mapping(
src_labels: LabelCategories, dst_labels: LabelCategories, fallback=0):
source_labels = { id: label.name
for id, label in enumerate(src_labels or LabelCategories().items)
}
target_labels = { label.name: id
for id, label in enumerate(dst_labels or LabelCategories().items)
}
id_mapping = { src_id: target_labels.get(src_label, fallback)
for src_id, src_label in source_labels.items()
}

def map_id(src_id):
return id_mapping.get(src_id, fallback)
return map_id, id_mapping, source_labels, target_labels

0 comments on commit 944e9f9

Please sign in to comment.