Skip to content

Commit

Permalink
Add label support in WiderFace dataset format (cvat-ai#90)
Browse files Browse the repository at this point in the history
* Add label support in WiderFace dataset format

* add labels.txt
  • Loading branch information
yasakova-anastasia authored Jan 19, 2021
1 parent 8b1a6b4 commit f489c17
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 22 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]
### Added
- `WiderFace` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/65>)
- `WiderFace` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/65>, <https://github.com/openvinotoolkit/datumaro/pull/90>)
- Function to transform annotations to labels (<https://github.com/openvinotoolkit/datumaro/pull/66>)
- Task-specific Splitter (<https://github.com/openvinotoolkit/datumaro/pull/68>, <https://github.com/openvinotoolkit/datumaro/pull/81>)
- `VGGFace2` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/69>, <https://github.com/openvinotoolkit/datumaro/pull/82>)
Expand Down
71 changes: 62 additions & 9 deletions datumaro/plugins/widerface_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@

from datumaro.components.converter import Converter
from datumaro.components.extractor import (AnnotationType, Bbox, DatasetItem,
Importer, SourceExtractor)
Importer, Label, LabelCategories, SourceExtractor)


class WiderFacePath:
IMAGE_EXT = '.jpg'
ANNOTATIONS_DIR = 'wider_face_split'
IMAGES_DIR = 'images'
SUBSET_DIR = 'WIDER_'
LABELS_FILE = 'labels.txt'
IMAGES_DIR_NO_LABEL = 'no_label'
BBOX_ATTRIBUTES = ['blur', 'expression', 'illumination',
'occluded', 'pose', 'invalid']

Expand All @@ -33,8 +35,31 @@ def __init__(self, path):
subset = subset.split('_')[2]
super().__init__(subset=subset)

self._categories = self._load_categories()
self._items = list(self._load_items(path).values())

def _load_categories(self):
self._categories[AnnotationType.label] = LabelCategories()
label_cat = LabelCategories()
path = osp.join(self._dataset_dir, WiderFacePath.LABELS_FILE)
if osp.isfile(path):
with open(path, encoding='utf-8') as labels_file:
labels = [s.strip() for s in labels_file]
for label in labels:
label_cat.add(label)
else:
subset_path = osp.join(self._dataset_dir,
WiderFacePath.SUBSET_DIR + self._subset,
WiderFacePath.IMAGES_DIR)
if osp.isdir(subset_path):
for images_dir in sorted(os.listdir(subset_path)):
if osp.isdir(osp.join(subset_path, images_dir)) and \
images_dir != WiderFacePath.IMAGES_DIR_NO_LABEL:
if '--' in images_dir:
images_dir = images_dir.split('--')[1]
label_cat.add(images_dir)
return { AnnotationType.label: label_cat }

def _load_items(self, path):
items = {}
with open(path, 'r') as f:
Expand All @@ -48,10 +73,19 @@ def _load_items(self, path):
image_path = osp.join(self._dataset_dir, WiderFacePath.SUBSET_DIR
+ self._subset, WiderFacePath.IMAGES_DIR, image[:-1])
item_id = image[:-(len(WiderFacePath.IMAGE_EXT) + 1)]
annotations = []
if '/' in item_id:
label_name = item_id.split('/')[0]
if '--' in label_name:
label_name = label_name.split('--')[1]
if label_name != WiderFacePath.IMAGES_DIR_NO_LABEL:
label = \
self._categories[AnnotationType.label].find(label_name)[0]
annotations.append(Label(label=label))
item_id = item_id[len(item_id.split('/')[0]) + 1:]

bbox_count = lines[image_id + 1]
bbox_lines = lines[image_id + 2 : image_id + int(bbox_count) + 2]
annotations = []
for bbox in bbox_lines:
bbox_list = bbox.split()
if len(bbox_list) >= 4:
Expand All @@ -63,8 +97,8 @@ def _load_items(self, path):
attributes[attr] = int(bbox_list[i])
i += 1
annotations.append(Bbox(
int(bbox_list[0]), int(bbox_list[1]),
int(bbox_list[2]), int(bbox_list[3]),
float(bbox_list[0]), float(bbox_list[1]),
float(bbox_list[2]), float(bbox_list[3]),
attributes = attributes
))

Expand All @@ -83,18 +117,37 @@ class WiderFaceConverter(Converter):

def apply(self):
save_dir = self._save_dir

os.makedirs(save_dir, exist_ok=True)

label_categories = self._extractor.categories()[AnnotationType.label]

labels_path = osp.join(save_dir, WiderFacePath.LABELS_FILE)
with open(labels_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(label.name for label in label_categories))

for subset_name, subset in self._extractor.subsets().items():
subset_dir = osp.join(save_dir, WiderFacePath.SUBSET_DIR + subset_name)

wider_annotation = ''
for item in subset:
wider_annotation += '%s\n' % (item.id + WiderFacePath.IMAGE_EXT)
if item.has_image and self._save_images:
self._save_image(item, osp.join(save_dir, subset_dir,
WiderFacePath.IMAGES_DIR, item.id + WiderFacePath.IMAGE_EXT))
labels = [a.label for a in item.annotations
if a.type == AnnotationType.label]
if labels:
wider_annotation += '%s\n' % (str(labels[0]) + '--' \
+ label_categories[labels[0]].name + '/' \
+ item.id + WiderFacePath.IMAGE_EXT)
if item.has_image and self._save_images:
self._save_image(item, osp.join(save_dir, subset_dir,
WiderFacePath.IMAGES_DIR, str(labels[0]) + '--' \
+ label_categories[labels[0]].name + '/' + item.id \
+ WiderFacePath.IMAGE_EXT))
else:
wider_annotation += '%s\n' % (WiderFacePath.IMAGES_DIR_NO_LABEL \
+ '/' + item.id + WiderFacePath.IMAGE_EXT)
if item.has_image and self._save_images:
self._save_image(item, osp.join(save_dir, subset_dir,
WiderFacePath.IMAGES_DIR, WiderFacePath.IMAGES_DIR_NO_LABEL \
+ '/' + item.id + WiderFacePath.IMAGE_EXT))

bboxes = [a for a in item.annotations
if a.type == AnnotationType.bbox]
Expand Down
2 changes: 2 additions & 0 deletions tests/assets/widerface_dataset/labels.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Parade
Handshaking
33 changes: 21 additions & 12 deletions tests/test_widerface_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from unittest import TestCase

import numpy as np
from datumaro.components.extractor import Bbox, DatasetItem
from datumaro.components.extractor import (AnnotationType, Bbox, DatasetItem,
Label, LabelCategories)
from datumaro.components.dataset import Dataset
from datumaro.plugins.widerface_format import WiderFaceConverter, WiderFaceImporter
from datumaro.util.test_utils import TestDir, compare_datasets
Expand All @@ -17,6 +18,7 @@ def test_can_save_and_load(self):
Bbox(0, 1, 2, 3, attributes = {
'blur': 2, 'expression': 0, 'illumination': 0,
'occluded': 0, 'pose': 2, 'invalid': 0}),
Label(0),
]
),
DatasetItem(id='2', subset='train', image=np.ones((10, 10, 3)),
Expand All @@ -30,6 +32,7 @@ def test_can_save_and_load(self):
Bbox(2, 1, 2, 3, attributes = {
'blur': 2, 'expression': 0, 'illumination': 0,
'occluded': 0, 'pose': 0, 'invalid': 1}),
Label(1),
]
),

Expand All @@ -47,11 +50,14 @@ def test_can_save_and_load(self):
),

DatasetItem(id='4', subset='val', image=np.ones((8, 8, 3))),
])
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(i) for i in range(3)),
})

with TestDir() as test_dir:
WiderFaceConverter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = WiderFaceImporter()(test_dir).make_dataset()
parsed_dataset = Dataset.import_from(test_dir, 'wider_face')

compare_datasets(self, source_dataset, parsed_dataset)

Expand All @@ -65,11 +71,11 @@ def test_can_save_dataset_with_no_subsets(self):
'occluded': 0, 'pose': 2, 'invalid': 0}),
]
),
])
], categories=[])

with TestDir() as test_dir:
WiderFaceConverter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = WiderFaceImporter()(test_dir).make_dataset()
parsed_dataset = Dataset.import_from(test_dir, 'wider_face')

compare_datasets(self, source_dataset, parsed_dataset)

Expand All @@ -85,7 +91,7 @@ def test_can_save_dataset_with_non_widerface_attributes(self):
'non-widerface attribute': 0}),
]
),
])
], categories=[])

target_dataset = Dataset.from_iterable([
DatasetItem(id='a/b/1', image=np.ones((8, 8, 3)),
Expand All @@ -96,11 +102,11 @@ def test_can_save_dataset_with_non_widerface_attributes(self):
Bbox(1, 1, 2, 2),
]
),
])
], categories=[])

with TestDir() as test_dir:
WiderFaceConverter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = WiderFaceImporter()(test_dir).make_dataset()
parsed_dataset = Dataset.import_from(test_dir, 'wider_face')

compare_datasets(self, target_dataset, parsed_dataset)

Expand All @@ -112,15 +118,16 @@ def test_can_detect(self):

def test_can_import(self):
expected_dataset = Dataset.from_iterable([
DatasetItem(id='0--Parade/0_Parade_image_01', subset='train',
DatasetItem(id='0_Parade_image_01', subset='train',
image=np.ones((10, 15, 3)),
annotations=[
Bbox(1, 2, 2, 2, attributes = {
'blur': 0, 'expression': 0, 'illumination': 0,
'occluded': 0, 'pose': 0, 'invalid': 0}),
Label(0),
]
),
DatasetItem(id='1--Handshaking/1_Handshaking_image_02', subset='train',
DatasetItem(id='1_Handshaking_image_02', subset='train',
image=np.ones((10, 15, 3)),
annotations=[
Bbox(1, 1, 2, 2, attributes = {
Expand All @@ -129,9 +136,10 @@ def test_can_import(self):
Bbox(5, 1, 2, 2, attributes = {
'blur': 0, 'expression': 0, 'illumination': 1,
'occluded': 0, 'pose': 0, 'invalid': 0}),
Label(1),
]
),
DatasetItem(id='0--Parade/0_Parade_image_03', subset='val',
DatasetItem(id='0_Parade_image_03', subset='val',
image=np.ones((10, 15, 3)),
annotations=[
Bbox(0, 0, 1, 1, attributes = {
Expand All @@ -143,9 +151,10 @@ def test_can_import(self):
Bbox(5, 6, 1, 1, attributes = {
'blur': 2, 'expression': 0, 'illumination': 0,
'occluded': 0, 'pose': 2, 'invalid': 0}),
Label(0),
]
),
])
], categories= ['Parade', 'Handshaking'])

dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'wider_face')

Expand Down

0 comments on commit f489c17

Please sign in to comment.