From c21dd84e455c82b31f9eee41d6885a81b95d1bd8 Mon Sep 17 00:00:00 2001 From: yasakova-anastasia Date: Mon, 12 Apr 2021 12:32:33 +0300 Subject: [PATCH 1/4] add face label --- datumaro/plugins/widerface_format.py | 38 ++++++++------ tests/test_widerface_format.py | 75 ++++++++++++---------------- 2 files changed, 56 insertions(+), 57 deletions(-) diff --git a/datumaro/plugins/widerface_format.py b/datumaro/plugins/widerface_format.py index f5e0008f60..60906144ed 100644 --- a/datumaro/plugins/widerface_format.py +++ b/datumaro/plugins/widerface_format.py @@ -21,6 +21,7 @@ class WiderFacePath: IMAGES_DIR_NO_LABEL = 'no_label' BBOX_ATTRIBUTES = ['blur', 'expression', 'illumination', 'occluded', 'pose', 'invalid'] + DEFAULT_LABEL = 'face' class WiderFaceExtractor(SourceExtractor): def __init__(self, path, subset=None): @@ -40,12 +41,14 @@ def __init__(self, path, subset=None): def _load_categories(self): label_cat = LabelCategories() - + label_cat.add(WiderFacePath.DEFAULT_LABEL) path = osp.join(self._dataset_dir, WiderFacePath.LABELS_FILE) if osp.isfile(path): with open(path, encoding='utf-8') as labels_file: for line in labels_file: - label_cat.add(line.strip()) + label_name = line.strip() + if label_name != WiderFacePath.DEFAULT_LABEL: + label_cat.add(label_name) else: subset_path = osp.join(self._dataset_dir, WiderFacePath.SUBSET_DIR + self._subset, @@ -56,12 +59,15 @@ def _load_categories(self): images_dir != WiderFacePath.IMAGES_DIR_NO_LABEL: if '--' in images_dir: images_dir = images_dir.split('--')[1] - label_cat.add(images_dir) - + if images_dir != WiderFacePath.DEFAULT_LABEL: + label_cat.add(images_dir) + if len(label_cat) == 1: + label_cat = LabelCategories() return { AnnotationType.label: label_cat } def _load_items(self, path): items = {} + label_categories = self._categories[AnnotationType.label] with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() @@ -73,6 +79,7 @@ def _load_items(self, path): for line_idx in line_ids: image_path = lines[line_idx].strip() item_id = osp.splitext(image_path)[0] + item_id = item_id.replace('\\', '/') image_path = osp.join(self._dataset_dir, WiderFacePath.SUBSET_DIR + self._subset, @@ -84,8 +91,7 @@ def _load_items(self, path): if '--' in label_name: label_name = label_name.split('--')[1] if label_name != WiderFacePath.IMAGES_DIR_NO_LABEL: - label = \ - self._categories[AnnotationType.label].find(label_name)[0] + label = label_categories.find(label_name)[0] annotations.append(Label(label=label)) item_id = item_id[len(item_id.split('/')[0]) + 1:] @@ -101,21 +107,22 @@ def _load_items(self, path): for bbox in bbox_lines: bbox_list = bbox.split() if 4 <= len(bbox_list): - attributes = {} - label = None + label = label_categories.find(WiderFacePath.DEFAULT_LABEL)[0] if len(bbox_list) == 5 or len(bbox_list) == 11: - if len(bbox_list) == 5: - label_name = bbox_list[4] - else: - label_name = bbox_list[10] - label = \ - self._categories[AnnotationType.label].find(label_name)[0] + label_name = bbox_list[-1] + label = label_categories.find(label_name)[0] + if label == None and len(label_categories) == 0: + label_categories.add(WiderFacePath.DEFAULT_LABEL) + label = label_categories.find(WiderFacePath.DEFAULT_LABEL)[0] + + attributes = {} if 10 <= len(bbox_list): i = 4 for attr in WiderFacePath.BBOX_ATTRIBUTES: if bbox_list[i] != '-': attributes[attr] = bbox_list[i] i += 1 + annotations.append(Bbox( float(bbox_list[0]), float(bbox_list[1]), float(bbox_list[2]), float(bbox_list[3]), @@ -180,7 +187,8 @@ def apply(self): wider_attr += '- ' if 0 < attr_counter: wider_annotation += wider_attr - if bbox.label is not None: + if label_categories[bbox.label].name != WiderFacePath.DEFAULT_LABEL and \ + bbox.label is not None: wider_annotation += '%s' % label_categories[bbox.label].name wider_annotation += '\n' diff --git a/tests/test_widerface_format.py b/tests/test_widerface_format.py index 0465f5d3f3..e20b621b4b 100644 --- a/tests/test_widerface_format.py +++ b/tests/test_widerface_format.py @@ -15,47 +15,44 @@ def test_can_save_and_load(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.ones((8, 8, 3)), annotations=[ - Bbox(0, 2, 4, 2), - Bbox(0, 1, 2, 3, attributes={ + Bbox(0, 2, 4, 2, label=0), + Bbox(0, 1, 2, 3, label=0, attributes={ 'blur': '2', 'expression': '0', 'illumination': '0', 'occluded': '0', 'pose': '2', 'invalid': '0'}), - Label(0), + Label(1), ] ), DatasetItem(id='2', subset='train', image=np.ones((10, 10, 3)), annotations=[ - Bbox(0, 2, 4, 2, attributes={ + Bbox(0, 2, 4, 2, label=0, attributes={ 'blur': '2', 'expression': '0', 'illumination': '1', 'occluded': '0', 'pose': '1', 'invalid': '0'}), - Bbox(3, 3, 2, 3, attributes={ + Bbox(3, 3, 2, 3, label=0, attributes={ 'blur': '0', 'expression': '1', 'illumination': '0', 'occluded': '0', 'pose': '2', 'invalid': '0'}), - Bbox(2, 1, 2, 3, attributes={ + Bbox(2, 1, 2, 3, label=0, attributes={ 'blur': '2', 'expression': '0', 'illumination': '0', 'occluded': '0', 'pose': '0', 'invalid': '1'}), - Label(1), + Label(2), ] ), DatasetItem(id='3', subset='val', image=np.ones((8, 8, 3)), annotations=[ - Bbox(0, 1.1, 5.3, 2.1, attributes={ + Bbox(0, 1.1, 5.3, 2.1, label=0, attributes={ 'blur': '2', 'expression': '1', 'illumination': '0', 'occluded': '0', 'pose': '1', 'invalid': '0'}), - Bbox(0, 2, 3, 2, attributes={ + Bbox(0, 2, 3, 2, label=0, attributes={ 'occluded': 'False'}), - Bbox(0, 2, 4, 2), - Bbox(0, 7, 3, 2, attributes={ + Bbox(0, 2, 4, 2, label=0), + Bbox(0, 7, 3, 2, label=0, attributes={ 'blur': '2', 'expression': '1', 'illumination': '0', 'occluded': '0', 'pose': '1', 'invalid': '0'}), ] ), DatasetItem(id='4', subset='val', image=np.ones((8, 8, 3))), - ], categories={ - AnnotationType.label: LabelCategories.from_iterable( - 'label_' + str(i) for i in range(3)), - }) + ], categories=['face', 'label_0', 'label_1']) with TestDir() as test_dir: WiderFaceConverter.convert(source_dataset, test_dir, save_images=True) @@ -73,10 +70,7 @@ def test_can_save_dataset_with_no_subsets(self): 'occluded': '0', 'pose': '2', 'invalid': '0'}), ] ), - ], categories={ - AnnotationType.label: LabelCategories.from_iterable( - 'label_' + str(i) for i in range(3)), - }) + ], categories=['face', 'label_0', 'label_1']) with TestDir() as test_dir: WiderFaceConverter.convert(source_dataset, test_dir, save_images=True) @@ -88,15 +82,12 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='кириллица с пробелом', image=np.ones((8, 8, 3)), annotations=[ - Bbox(0, 1, 2, 3, label=1, attributes = { + Bbox(0, 1, 2, 3, label=0, attributes = { 'blur': '2', 'expression': '0', 'illumination': '0', 'occluded': '0', 'pose': '2', 'invalid': '0'}), ] ), - ], categories={ - AnnotationType.label: LabelCategories.from_iterable( - 'label_' + str(i) for i in range(3)), - }) + ], categories=['face']) with TestDir() as test_dir: WiderFaceConverter.convert(source_dataset, test_dir, save_images=True) @@ -109,26 +100,26 @@ def test_can_save_dataset_with_non_widerface_attributes(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='a/b/1', image=np.ones((8, 8, 3)), annotations=[ - Bbox(0, 2, 4, 2), - Bbox(0, 1, 2, 3, attributes={ + Bbox(0, 2, 4, 2, label=0), + Bbox(0, 1, 2, 3, label=0, attributes={ 'non-widerface attribute': '0', 'blur': 1, 'invalid': '1'}), - Bbox(1, 1, 2, 2, attributes={ + Bbox(1, 1, 2, 2, label=0, attributes={ 'non-widerface attribute': '0'}), ] ), - ], categories=[]) + ], categories=['face']) target_dataset = Dataset.from_iterable([ DatasetItem(id='a/b/1', image=np.ones((8, 8, 3)), annotations=[ - Bbox(0, 2, 4, 2), - Bbox(0, 1, 2, 3, attributes={ + Bbox(0, 2, 4, 2, label=0), + Bbox(0, 1, 2, 3, label=0, attributes={ 'blur': '1', 'invalid': '1'}), - Bbox(1, 1, 2, 2), + Bbox(1, 1, 2, 2, label=0), ] ), - ], categories=[]) + ], categories=['face']) with TestDir() as test_dir: WiderFaceConverter.convert(source_dataset, test_dir, save_images=True) @@ -161,40 +152,40 @@ def test_can_import(self): DatasetItem(id='0_Parade_image_01', subset='train', image=np.ones((10, 15, 3)), annotations=[ - Bbox(1, 2, 2, 2, attributes={ + Bbox(1, 2, 2, 2, label=0, attributes={ 'blur': '0', 'expression': '0', 'illumination': '0', 'occluded': '0', 'pose': '0', 'invalid': '0'}), - Label(0), + Label(1), ] ), DatasetItem(id='1_Handshaking_image_02', subset='train', image=np.ones((10, 15, 3)), annotations=[ - Bbox(1, 1, 2, 2, attributes={ + Bbox(1, 1, 2, 2, label=0, attributes={ 'blur': '0', 'expression': '0', 'illumination': '1', 'occluded': '0', 'pose': '0', 'invalid': '0'}), - Bbox(5, 1, 2, 2, attributes={ + Bbox(5, 1, 2, 2, label=0, attributes={ 'blur': '0', 'expression': '0', 'illumination': '1', 'occluded': '0', 'pose': '0', 'invalid': '0'}), - Label(1), + Label(2), ] ), DatasetItem(id='0_Parade_image_03', subset='val', image=np.ones((10, 15, 3)), annotations=[ - Bbox(0, 0, 1, 1, attributes={ + Bbox(0, 0, 1, 1, label=0, attributes={ 'blur': '2', 'expression': '0', 'illumination': '0', 'occluded': '0', 'pose': '2', 'invalid': '0'}), - Bbox(3, 2, 1, 2, attributes={ + Bbox(3, 2, 1, 2, label=0, attributes={ 'blur': '0', 'expression': '0', 'illumination': '0', 'occluded': '1', 'pose': '0', 'invalid': '0'}), - Bbox(5, 6, 1, 1, attributes={ + Bbox(5, 6, 1, 1, label=0, attributes={ 'blur': '2', 'expression': '0', 'illumination': '0', 'occluded': '0', 'pose': '2', 'invalid': '0'}), - Label(0), + Label(1), ] ), - ], categories= ['Parade', 'Handshaking']) + ], categories= ['face', 'Parade', 'Handshaking']) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'wider_face') From 4810fe107236a2d08d8cb7e4aab16317fc301019 Mon Sep 17 00:00:00 2001 From: yasakova-anastasia Date: Mon, 12 Apr 2021 12:38:19 +0300 Subject: [PATCH 2/4] update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9963e4707e..ef2dd699a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added support for auto-merging (joining) of datasets with no labels and having labels () - Allowed explicit label removal in `remap_labels` transform () - Image extension in CVAT format export () +- Added a label "face" for bounding boxes in Wider Face () ### Security - From 67f3cf3b851c0d185e56a8574e7b7209824ba0d9 Mon Sep 17 00:00:00 2001 From: yasakova-anastasia Date: Mon, 12 Apr 2021 13:08:24 +0300 Subject: [PATCH 3/4] add check --- datumaro/plugins/widerface_format.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datumaro/plugins/widerface_format.py b/datumaro/plugins/widerface_format.py index 60906144ed..214c9d5e83 100644 --- a/datumaro/plugins/widerface_format.py +++ b/datumaro/plugins/widerface_format.py @@ -92,7 +92,8 @@ def _load_items(self, path): label_name = label_name.split('--')[1] if label_name != WiderFacePath.IMAGES_DIR_NO_LABEL: label = label_categories.find(label_name)[0] - annotations.append(Label(label=label)) + if label != None: + annotations.append(Label(label=label)) item_id = item_id[len(item_id.split('/')[0]) + 1:] items[item_id] = DatasetItem(id=item_id, subset=self._subset, From a8ae0d0566607efed3409b268ec0c659bb39fe52 Mon Sep 17 00:00:00 2001 From: yasakova-anastasia Date: Tue, 13 Apr 2021 13:58:54 +0300 Subject: [PATCH 4/4] some fixes --- datumaro/plugins/widerface_format.py | 10 ++++------ tests/test_widerface_format.py | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/datumaro/plugins/widerface_format.py b/datumaro/plugins/widerface_format.py index 214c9d5e83..96796b09f3 100644 --- a/datumaro/plugins/widerface_format.py +++ b/datumaro/plugins/widerface_format.py @@ -41,15 +41,13 @@ def __init__(self, path, subset=None): def _load_categories(self): label_cat = LabelCategories() - label_cat.add(WiderFacePath.DEFAULT_LABEL) path = osp.join(self._dataset_dir, WiderFacePath.LABELS_FILE) if osp.isfile(path): with open(path, encoding='utf-8') as labels_file: for line in labels_file: - label_name = line.strip() - if label_name != WiderFacePath.DEFAULT_LABEL: - label_cat.add(label_name) + label_cat.add(line.strip()) else: + label_cat.add(WiderFacePath.DEFAULT_LABEL) subset_path = osp.join(self._dataset_dir, WiderFacePath.SUBSET_DIR + self._subset, WiderFacePath.IMAGES_DIR) @@ -61,8 +59,8 @@ def _load_categories(self): images_dir = images_dir.split('--')[1] if images_dir != WiderFacePath.DEFAULT_LABEL: label_cat.add(images_dir) - if len(label_cat) == 1: - label_cat = LabelCategories() + if len(label_cat) == 1: + label_cat = LabelCategories() return { AnnotationType.label: label_cat } def _load_items(self, path): diff --git a/tests/test_widerface_format.py b/tests/test_widerface_format.py index e20b621b4b..8e2586999e 100644 --- a/tests/test_widerface_format.py +++ b/tests/test_widerface_format.py @@ -152,40 +152,40 @@ def test_can_import(self): DatasetItem(id='0_Parade_image_01', subset='train', image=np.ones((10, 15, 3)), annotations=[ - Bbox(1, 2, 2, 2, label=0, attributes={ + Bbox(1, 2, 2, 2, attributes={ 'blur': '0', 'expression': '0', 'illumination': '0', 'occluded': '0', 'pose': '0', 'invalid': '0'}), - Label(1), + Label(0), ] ), DatasetItem(id='1_Handshaking_image_02', subset='train', image=np.ones((10, 15, 3)), annotations=[ - Bbox(1, 1, 2, 2, label=0, attributes={ + Bbox(1, 1, 2, 2, attributes={ 'blur': '0', 'expression': '0', 'illumination': '1', 'occluded': '0', 'pose': '0', 'invalid': '0'}), - Bbox(5, 1, 2, 2, label=0, attributes={ + Bbox(5, 1, 2, 2, attributes={ 'blur': '0', 'expression': '0', 'illumination': '1', 'occluded': '0', 'pose': '0', 'invalid': '0'}), - Label(2), + Label(1), ] ), DatasetItem(id='0_Parade_image_03', subset='val', image=np.ones((10, 15, 3)), annotations=[ - Bbox(0, 0, 1, 1, label=0, attributes={ + Bbox(0, 0, 1, 1, attributes={ 'blur': '2', 'expression': '0', 'illumination': '0', 'occluded': '0', 'pose': '2', 'invalid': '0'}), - Bbox(3, 2, 1, 2, label=0, attributes={ + Bbox(3, 2, 1, 2, attributes={ 'blur': '0', 'expression': '0', 'illumination': '0', 'occluded': '1', 'pose': '0', 'invalid': '0'}), - Bbox(5, 6, 1, 1, label=0, attributes={ + Bbox(5, 6, 1, 1, attributes={ 'blur': '2', 'expression': '0', 'illumination': '0', 'occluded': '0', 'pose': '2', 'invalid': '0'}), - Label(1), + Label(0), ] ), - ], categories= ['face', 'Parade', 'Handshaking']) + ], categories= ['Parade', 'Handshaking']) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'wider_face')