Skip to content

Commit

Permalink
Fix inplace saving in CamVid and Cityscapes (cvat-ai#367)
Browse files Browse the repository at this point in the history
* Fix patching in YOLO

* Improve mask categories generation

* Fix patching in CamVid

* Add item check in DatasetPatch

* Fix Dataset.is_bound return value type

* Add patching in cityscapes

* Fix background color in colormap generation in Cityscapes

* Update cityscapes format

* Update cityscapes docs

* Fix cvat-ai#325

* Update changelog
  • Loading branch information
Maxim Zhiltsov authored Jul 21, 2021
1 parent 6888e42 commit 3a6af62
Show file tree
Hide file tree
Showing 11 changed files with 457 additions and 241 deletions.
10 changes: 3 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- TBD

### Fixed
- Patching of datasets in Datumaro, CVAT, COCO, CIFAR and Open Images formats
(<https://github.com/openvinotoolkit/datumaro/pull/365>,
<https://github.com/openvinotoolkit/datumaro/pull/347>,
<https://github.com/openvinotoolkit/datumaro/pull/346>,
<https://github.com/openvinotoolkit/datumaro/pull/363>)
<https://github.com/openvinotoolkit/datumaro/pull/346>,
<https://github.com/openvinotoolkit/datumaro/pull/363>)
- Patching of datasets in formats (<https://github.com/openvinotoolkit/datumaro/issues/348>)
- Unsafe unpickling in CIFAR import (<https://github.com/openvinotoolkit/datumaro/pull/362>)
- Improved Cityscapes export performance (<https://github.com/openvinotoolkit/datumaro/pull/367>)
- Incorrect format of `*_labelIds.png` in Cityscapes export (<https://github.com/openvinotoolkit/datumaro/issues/325>, <https://github.com/openvinotoolkit/datumaro/issues/342>)

### Security
- TBD
Expand Down
5 changes: 4 additions & 1 deletion datumaro/components/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ def updated_subsets(self) -> Dict[str, ItemStatus]:
for s in self.data.subsets()}
return self._updated_subsets

def __contains__(self, x: Union[DatasetItem, Tuple[str, str]]) -> bool:
return x in self.data

def as_dataset(self, parent: IDataset) -> IDataset:
return __class__.DatasetPatchWrapper(self, parent)

Expand Down Expand Up @@ -758,7 +761,7 @@ def is_eager(self) -> bool:

@property
def is_bound(self) -> bool:
return self._source_path and self._format
return bool(self._source_path) and bool(self._format)

def bind(self, path: str, format: str = None):
self._source_path = path
Expand Down
18 changes: 14 additions & 4 deletions datumaro/components/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,19 @@ class Label(Annotation):
@attrs(eq=False)
class MaskCategories(Categories):
@classmethod
def make_default(cls, size=256):
def generate(cls, size=255, include_background=True):
"""
Generates a color map with the specified size.
If include_background is True, the result will include the item
"0: (0, 0, 0)", which is typically used as a background color.
"""
from datumaro.util.mask_tools import generate_colormap
return cls(generate_colormap(size))
colormap = generate_colormap(size + (not include_background))
if not include_background:
colormap.pop(0)
colormap = { k - 1: v for k, v in colormap.items() }
return cls(colormap)

colormap = attrib(factory=dict, validator=default_if_none(dict))
_inverse_colormap = attrib(default=None,
Expand Down Expand Up @@ -238,12 +248,12 @@ def from_instance_masks(instance_masks,
instance_ids=None, instance_labels=None, dtype=None):
from datumaro.util.mask_tools import make_index_mask

if instance_ids is not None:
if instance_ids:
assert len(instance_ids) == len(instance_masks)
else:
instance_ids = [None] * len(instance_masks)

if instance_labels is not None:
if instance_labels:
assert len(instance_labels) == len(instance_masks)
else:
instance_labels = [None] * len(instance_masks)
Expand Down
50 changes: 45 additions & 5 deletions datumaro/plugins/camvid_format.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# Copyright (C) 2020-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
Expand All @@ -12,6 +11,7 @@
import numpy as np

from datumaro.components.converter import Converter
from datumaro.components.dataset import ItemStatus
from datumaro.components.extractor import (
AnnotationType, CompiledMask, DatasetItem, Importer, LabelCategories, Mask,
MaskCategories, SourceExtractor,
Expand Down Expand Up @@ -118,7 +118,7 @@ def make_camvid_categories(label_map=None):

categories = {}
label_categories = LabelCategories()
for label, desc in label_map.items():
for label in label_map:
label_categories.add(label)
categories[AnnotationType.label] = label_categories

Expand All @@ -128,7 +128,7 @@ def make_camvid_categories(label_map=None):
else: # only copy defined colors
label_id = lambda label: label_categories.find(label)[0]
colormap = { label_id(name): (desc[0], desc[1], desc[2])
for name, desc in label_map.items() }
for name, desc in label_map.items() if desc }
mask_categories = MaskCategories(colormap)
mask_categories.inverse_colormap # pylint: disable=pointless-statement
categories[AnnotationType.mask] = mask_categories
Expand Down Expand Up @@ -290,10 +290,14 @@ def save_segm(self, path, mask, colormap=None):
save_image(path, mask, create_dir=True)

def save_segm_lists(self, subset_name, segm_list):
ann_file = osp.join(self._save_dir, subset_name + '.txt')

if not segm_list:
if self._patch and subset_name in self._patch.updated_subsets:
if osp.isfile(ann_file):
os.remove(ann_file)
return

ann_file = osp.join(self._save_dir, subset_name + '.txt')
with open(ann_file, 'w', encoding='utf-8') as f:
for (image_path, mask_path) in segm_list.values():
image_path = '/' + image_path.replace('\\', '/')
Expand All @@ -305,7 +309,7 @@ def save_segm_lists(self, subset_name, segm_list):

def save_label_map(self):
path = osp.join(self._save_dir, CamvidPath.LABELMAP_FILE)
labels = self._extractor.categories()[AnnotationType.label]._indices
labels = self._extractor.categories()[AnnotationType.label]
if len(self._label_map) > len(labels):
self._label_map.pop('background')
write_label_map(path, self._label_map)
Expand Down Expand Up @@ -372,3 +376,39 @@ def _make_label_id_map(self):
)

return map_id

@classmethod
def patch(cls, dataset, patch, save_dir, **kwargs):
for subset in patch.updated_subsets:
conv = cls(dataset.get_subset(subset), save_dir=save_dir, **kwargs)
conv._patch = patch
conv.apply()

conv = cls(dataset, save_dir=save_dir, **kwargs)
for (item_id, subset), status in patch.updated_items.items():
if status != ItemStatus.removed:
item = patch.data.get(item_id, subset)
else:
item = DatasetItem(item_id, subset=subset)

if not (status == ItemStatus.removed or not item.has_image):
continue

image_path = osp.join(save_dir,
conv._make_image_filename(item, subdir=subset))
if osp.isfile(image_path):
os.unlink(image_path)

mask_path = osp.join(save_dir, subset + CamvidPath.SEGM_DIR,
item.id + CamvidPath.MASK_EXT)
if osp.isfile(mask_path):
os.remove(mask_path)

for subset in patch.updated_subsets:
ann_dir = osp.join(save_dir, subset + CamvidPath.SEGM_DIR)
if osp.isdir(ann_dir) and not os.listdir(ann_dir):
os.rmdir(ann_dir)

img_dir = osp.join(save_dir, subset)
if osp.isdir(img_dir) and not os.listdir(img_dir):
os.rmdir(img_dir)
Loading

0 comments on commit 3a6af62

Please sign in to comment.