Skip to content

Commit

Permalink
Dataset meta file for some formats (#569)
Browse files Browse the repository at this point in the history
* add meta file support in CelebA, CIFAR, MNIST, LFW, YOLO, COCO, SYNTHIA, ADE20K

* update documentation

* update tests

* update changelog
  • Loading branch information
yasakova-anastasia authored Nov 30, 2021
1 parent ba452e5 commit 82cdb62
Show file tree
Hide file tree
Showing 115 changed files with 704 additions and 32 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/547>)
- Import for SYNTHIA dataset format
(<https://github.com/openvinotoolkit/datumaro/pull/532>)
- Support for Accuracy Checker dataset meta files in formats
(<https://github.com/openvinotoolkit/datumaro/pull/553>)
- Support of `score` attribute in KITTI detetion
(<https://github.com/openvinotoolkit/datumaro/pull/571>)
- Support for Accuracy Checker dataset meta files in formats
(<https://github.com/openvinotoolkit/datumaro/pull/553>,
<https://github.com/openvinotoolkit/datumaro/pull/569>)

### Changed
- The following formats can now be detected unambiguously:
Expand Down
9 changes: 8 additions & 1 deletion datumaro/plugins/ade20k2017_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from datumaro.util.image import (
IMAGE_EXTENSIONS, find_images, lazy_image, load_image,
)
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class Ade20k2017Path:
Expand All @@ -31,7 +32,9 @@ def __init__(self, path):
if not osp.isdir(path):
raise FileNotFoundError("Can't read dataset directory '%s'" % path)

subsets = os.listdir(path)
# exclude dataset meta file
subsets = [subset for subset in os.listdir(path)
if osp.splitext(subset)[-1] != '.json']
if len(subsets) < 1:
raise FileNotFoundError("Can't read subsets in directory '%s'" % path)

Expand All @@ -41,6 +44,10 @@ def __init__(self, path):
self._items = []
self._categories = {}

if has_meta_file(self._path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._path).keys())) }

for subset in self._subsets:
self._load_items(subset)

Expand Down
9 changes: 8 additions & 1 deletion datumaro/plugins/ade20k2020_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from datumaro.util.image import (
IMAGE_EXTENSIONS, find_images, lazy_image, load_image,
)
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class Ade20k2020Path:
Expand All @@ -33,7 +34,9 @@ def __init__(self, path):
if not osp.isdir(path):
raise FileNotFoundError("Can't read dataset directory '%s'" % path)

subsets = os.listdir(path)
# exclude dataset meta file
subsets = [subset for subset in os.listdir(path)
if osp.splitext(subset)[-1] != '.json']
if len(subsets) < 1:
raise FileNotFoundError("Can't read subsets in directory '%s'" % path)

Expand All @@ -43,6 +46,10 @@ def __init__(self, path):
self._items = []
self._categories = {}

if has_meta_file(self._path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._path).keys())) }

for subset in self._subsets:
self._load_items(subset)

Expand Down
5 changes: 5 additions & 0 deletions datumaro/plugins/align_celeba_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from datumaro.components.errors import DatasetImportError
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util.image import find_images
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class AlignCelebaPath:
Expand All @@ -32,6 +33,10 @@ def __init__(self, path):
self._anno_dir = osp.dirname(path)

self._categories = { AnnotationType.label: LabelCategories() }
if has_meta_file(path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }

self._items = list(self._load_items(path).values())

def _load_items(self, root_dir):
Expand Down
5 changes: 5 additions & 0 deletions datumaro/plugins/celeba_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from datumaro.components.errors import DatasetImportError
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util.image import find_images
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class CelebaPath:
Expand All @@ -30,6 +31,10 @@ def __init__(self, path):
super().__init__()

self._categories = { AnnotationType.label: LabelCategories() }
if has_meta_file(path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }

self._items = list(self._load_items(path).values())

def _load_items(self, root_dir):
Expand Down
8 changes: 8 additions & 0 deletions datumaro/plugins/cifar_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from datumaro.components.dataset import ItemStatus
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util import cast
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class RestrictedUnpickler(pickle.Unpickler):
Expand Down Expand Up @@ -65,6 +66,10 @@ def __init__(self, path, subset=None):
self._items = list(self._load_items(path).values())

def _load_categories(self, path):
if has_meta_file(path):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }

label_cat = LabelCategories()

meta_file = osp.join(path, CifarPath.META_10_FILE)
Expand Down Expand Up @@ -175,6 +180,9 @@ class CifarConverter(Converter):
def apply(self):
os.makedirs(self._save_dir, exist_ok=True)

if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

label_categories = self._extractor.categories()[AnnotationType.label]
label_names = []
coarse_label_names = []
Expand Down
13 changes: 10 additions & 3 deletions datumaro/plugins/kitti_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from datumaro.util.annotation_util import make_label_id_mapping
from datumaro.util.image import save_image
from datumaro.util.mask_tools import paint_mask
from datumaro.util.meta_file_util import is_meta_file, parse_meta_file

from .format import (
KittiLabelMap, KittiPath, KittiTask, make_kitti_categories, parse_label_map,
Expand Down Expand Up @@ -156,8 +157,11 @@ def get_label(self, label_id):
categories()[AnnotationType.label].items[label_id].name

def save_label_map(self):
path = osp.join(self._save_dir, KittiPath.LABELMAP_FILE)
write_label_map(path, self._label_map)
if self._save_dataset_meta:
self._save_meta_file(self._save_dir)
else:
path = osp.join(self._save_dir, KittiPath.LABELMAP_FILE)
write_label_map(path, self._label_map)

def _load_categories(self, label_map_source):
if label_map_source == LabelmapType.kitti.name:
Expand Down Expand Up @@ -188,7 +192,10 @@ def _load_categories(self, label_map_source):
sorted(label_map_source.items(), key=lambda e: e[0]))

elif isinstance(label_map_source, str) and osp.isfile(label_map_source):
label_map = parse_label_map(label_map_source)
if is_meta_file(label_map_source):
label_map = parse_meta_file(label_map_source)
else:
label_map = parse_label_map(label_map_source)

else:
raise Exception("Wrong labelmap specified, "
Expand Down
17 changes: 13 additions & 4 deletions datumaro/plugins/kitti_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
AnnotationType, DatasetItem, SourceExtractor,
)
from datumaro.util.image import find_images, load_image
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file

from .format import (
KittiLabelMap, KittiPath, KittiTask, make_kitti_categories, parse_label_map,
Expand All @@ -36,15 +37,23 @@ def _load_categories(self, path):
if self._task == KittiTask.segmentation:
return self._load_categories_segmentation(path)
elif self._task == KittiTask.detection:
if has_meta_file(path):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }

return {AnnotationType.label: LabelCategories()}

def _load_categories_segmentation(self, path):
label_map = None
label_map_path = osp.join(path, KittiPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
if has_meta_file(path):
label_map = parse_meta_file(path)
else:
label_map = KittiLabelMap
label_map_path = osp.join(path, KittiPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
else:
label_map = KittiLabelMap

self._labels = [label for label in label_map]
return make_kitti_categories(label_map)

Expand Down
9 changes: 9 additions & 0 deletions datumaro/plugins/lfw_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.util.image import find_images
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class LfwPath:
Expand Down Expand Up @@ -44,6 +45,10 @@ def __init__(self, path, subset=None):
self._items = list(self._load_items(path).values())

def _load_categories(self, path):
if has_meta_file(self._dataset_dir):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._dataset_dir).keys())) }

label_cat = LabelCategories()
if osp.isfile(path):
with open(path, encoding='utf-8') as labels_file:
Expand Down Expand Up @@ -188,6 +193,10 @@ class LfwConverter(Converter):
DEFAULT_IMAGE_EXT = LfwPath.IMAGE_EXT

def apply(self):
os.makedirs(self._save_dir, exist_ok=True)
if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

for subset_name, subset in self._extractor.subsets().items():
label_categories = self._extractor.categories()[AnnotationType.label]
labels = {label.name: 0 for label in label_categories}
Expand Down
8 changes: 8 additions & 0 deletions datumaro/plugins/mnist_csv_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
)
from datumaro.components.converter import Converter
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class MnistCsvPath:
Expand All @@ -35,6 +36,10 @@ def __init__(self, path, subset=None):
self._items = list(self._load_items(path).values())

def _load_categories(self):
if has_meta_file(self._dataset_dir):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._dataset_dir).keys())) }

label_cat = LabelCategories()

labels_file = osp.join(self._dataset_dir, 'labels.txt')
Expand Down Expand Up @@ -100,6 +105,9 @@ class MnistCsvConverter(Converter):

def apply(self):
os.makedirs(self._save_dir, exist_ok=True)
if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

for subset_name, subset in self._extractor.subsets().items():
data = []
item_ids = {}
Expand Down
8 changes: 8 additions & 0 deletions datumaro/plugins/mnist_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)
from datumaro.components.converter import Converter
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class MnistPath:
Expand Down Expand Up @@ -43,6 +44,10 @@ def __init__(self, path, subset=None):
self._items = list(self._load_items(path).values())

def _load_categories(self):
if has_meta_file(self._dataset_dir):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._dataset_dir).keys())) }

label_cat = LabelCategories()

labels_file = osp.join(self._dataset_dir, 'labels.txt')
Expand Down Expand Up @@ -116,6 +121,9 @@ class MnistConverter(Converter):

def apply(self):
os.makedirs(self._save_dir, exist_ok=True)
if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

for subset_name, subset in self._extractor.subsets().items():
labels = []
images = np.array([])
Expand Down
3 changes: 3 additions & 0 deletions datumaro/plugins/synthia_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.util.image import find_images, load_image
from datumaro.util.mask_tools import generate_colormap, lazy_mask
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class SynthiaPath:
Expand Down Expand Up @@ -98,6 +99,8 @@ def __init__(self, path):
self._items = list(self._load_items(path).values())

def _load_categories(self, path):
if has_meta_file(path):
return make_categories(parse_meta_file(path))
label_map_path = osp.join(path, SynthiaPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
Expand Down
3 changes: 3 additions & 0 deletions datumaro/plugins/yolo_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ def apply(self):

os.makedirs(save_dir, exist_ok=True)

if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

label_categories = extractor.categories()[AnnotationType.label]
label_ids = {label.name: idx
for idx, label in enumerate(label_categories.items)}
Expand Down
5 changes: 5 additions & 0 deletions datumaro/plugins/yolo_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from datumaro.util.image import (
DEFAULT_IMAGE_META_FILE_NAME, load_image_meta_file,
)
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file
from datumaro.util.os_util import split_path

from .format import YoloPath
Expand Down Expand Up @@ -171,6 +172,10 @@ def _parse_annotations(anno_path, image):

@staticmethod
def _load_categories(names_path):
if has_meta_file(osp.dirname(names_path)):
return LabelCategories().from_iterable(
list(parse_meta_file(osp.dirname(names_path)).keys()))

label_categories = LabelCategories()

with open(names_path, 'r', encoding='utf-8') as f:
Expand Down
3 changes: 3 additions & 0 deletions site/content/en/docs/formats/ade20k2017.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ ADE20K dataset directory should have the following structure:
<!--lint disable fenced-code-flag-->
```
dataset/
├── dataset_meta.json # a list of non-format labels (optional)
├── subset1/
│ └── super_label_1/
│ ├── img1.jpg
Expand Down Expand Up @@ -82,6 +83,8 @@ image. Each line in the text file contains:
Each column is separated by a `#`. See example of dataset
[here](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/ade20k2017_dataset).

To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).

## Export to other formats

Datumaro can convert an ADE20K dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/).
Expand Down
3 changes: 3 additions & 0 deletions site/content/en/docs/formats/ade20k2020.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ ADE20K dataset directory should have the following structure:
<!--lint disable fenced-code-flag-->
```
dataset/
├── dataset_meta.json # a list of non-format labels (optional)
├── subset1/
│ ├── img1/ # directory with instance masks for img1
│ | ├── instance_001_img1.png
Expand Down Expand Up @@ -101,6 +102,8 @@ See our [tests asset](https://github.com/openvinotoolkit/datumaro/tree/develop/t
for example of this file,
or check [ADE20K toolkit](https://github.com/CSAILVision/ADE20K) for it.

To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).

## Export to other formats

Datumaro can convert an ADE20K dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/).
Expand Down
3 changes: 3 additions & 0 deletions site/content/en/docs/formats/align_celeba.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Align CelebA dataset directory should have the following structure:
<!--lint disable fenced-code-flag-->
```
dataset/
├── dataset_meta.json # a list of non-format labels (optional)
├── Anno/
│   ├── identity_CelebA.txt
│   ├── list_attr_celeba.txt
Expand All @@ -69,6 +70,8 @@ landmarks and subsets respectively (optional).
The original CelebA dataset stores images in a .7z archive. The archive
needs to be unpacked before importing.

To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).

## Export to other formats

Datumaro can convert an align CelebA dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/).
Expand Down
Loading

0 comments on commit 82cdb62

Please sign in to comment.