Skip to content

Commit

Permalink
Fixes in WiderFace and VGGFace2 formats (cvat-ai#103)
Browse files Browse the repository at this point in the history
* Fix type casting of points and attributes and add the ability to save the label to bbox

* Add check for wrong number of points

* Remove groups, add checks for number of annotations

* refactor VggFace2

* Refactor Widerface

Co-authored-by: Maxim Zhiltsov <[email protected]>
  • Loading branch information
yasakova-anastasia and Maxim Zhiltsov authored Feb 16, 2021
1 parent d86799c commit 1325eef
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 118 deletions.
85 changes: 53 additions & 32 deletions datumaro/plugins/vgg_face2_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,31 +57,41 @@ def _load_categories(self):
return { AnnotationType.label: label_cat }

def _load_items(self, path):
def _split_item_path(path):
label_name = path.split('/')[0]
label = None
if label_name != VggFace2Path.IMAGES_DIR_NO_LABEL:
label = \
self._categories[AnnotationType.label].find(label_name)[0]
item_id = path[len(label_name) + 1:]
return item_id, label

items = {}

with open(path) as content:
landmarks_table = list(csv.DictReader(content))

for row in landmarks_table:
item_id = row['NAME_ID']
label = None
if '/' in item_id:
label_name = item_id.split('/')[0]
if label_name != VggFace2Path.IMAGES_DIR_NO_LABEL:
label = \
self._categories[AnnotationType.label].find(label_name)[0]
item_id = item_id[len(label_name) + 1:]
item_id, label = _split_item_path(item_id)

if item_id not in items:
image_path = osp.join(self._dataset_dir, self._subset,
row['NAME_ID'] + VggFace2Path.IMAGE_EXT)
items[item_id] = DatasetItem(id=item_id, subset=self._subset,
image=image_path)

annotations = items[item_id].annotations
if [a for a in annotations if a.type == AnnotationType.points]:
raise Exception("Item %s: an image can have only one "
"set of landmarks" % item_id)

if len([p for p in row if row[p] == '']) == 0 and len(row) == 11:
annotations.append(Points(
[float(row[p]) for p in row if p != 'NAME_ID'], label=label,
group=1))
[float(row[p]) for p in row if p != 'NAME_ID'], label=label))
elif label is not None:
annotations.append(Label(label=label, group=1))
annotations.append(Label(label=label))

bboxes_path = osp.join(self._dataset_dir, VggFace2Path.ANNOTATION_DIR,
VggFace2Path.BBOXES_FILE + self._subset + '.csv')
Expand All @@ -92,20 +102,22 @@ def _load_items(self, path):
item_id = row['NAME_ID']
label = None
if '/' in item_id:
label_name = item_id.split('/')[0]
if label_name != VggFace2Path.IMAGES_DIR_NO_LABEL:
label = \
self._categories[AnnotationType.label].find(label_name)[0]
item_id = item_id[len(label_name) + 1:]
item_id, label = _split_item_path(item_id)

if item_id not in items:
image_path = osp.join(self._dataset_dir, self._subset,
row['NAME_ID'] + VggFace2Path.IMAGE_EXT)
items[item_id] = DatasetItem(id=item_id, subset=self._subset,
image=image_path)

annotations = items[item_id].annotations
if [a for a in annotations if a.type == AnnotationType.bbox]:
raise Exception("Item %s: an image can have only one "
"bbox" % item_id)

if len([p for p in row if row[p] == '']) == 0 and len(row) == 5:
annotations.append(Bbox(float(row['X']), float(row['Y']),
float(row['W']), float(row['H']), label=label, group=1))
float(row['W']), float(row['H']), label=label))
return items

class VggFace2Importer(Importer):
Expand Down Expand Up @@ -155,34 +167,43 @@ def apply(self):

landmarks = [a for a in item.annotations
if a.type == AnnotationType.points]
for landmark in landmarks:
if landmark.label is not None and \
label_categories[landmark.label].name:
name_id = label_categories[landmark.label].name \
if 1 < len(landmarks):
raise Exception("Item (%s, %s): an image can have only one "
"set of landmarks" % (item.id, item.subset))
if landmarks:
if landmarks[0].label is not None and \
label_categories[landmarks[0].label].name:
name_id = label_categories[landmarks[0].label].name \
+ '/' + item.id
else:
name_id = VggFace2Path.IMAGES_DIR_NO_LABEL \
+ '/' + item.id
points = landmark.points
landmarks_table.append({'NAME_ID': name_id,
'P1X': points[0], 'P1Y': points[1],
'P2X': points[2], 'P2Y': points[3],
'P3X': points[4], 'P3Y': points[5],
'P4X': points[6], 'P4Y': points[7],
'P5X': points[8], 'P5Y': points[9]})
points = landmarks[0].points
if len(points) != 10:
landmarks_table.append({'NAME_ID': name_id})
else:
landmarks_table.append({'NAME_ID': name_id,
'P1X': points[0], 'P1Y': points[1],
'P2X': points[2], 'P2Y': points[3],
'P3X': points[4], 'P3Y': points[5],
'P4X': points[6], 'P4Y': points[7],
'P5X': points[8], 'P5Y': points[9]})

bboxes = [a for a in item.annotations
if a.type == AnnotationType.bbox]
for bbox in bboxes:
if bbox.label is not None and \
label_categories[bbox.label].name:
name_id = label_categories[bbox.label].name \
if 1 < len(bboxes):
raise Exception("Item (%s, %s): an image can have only one "
"bbox" % (item.id, item.subset))
if bboxes:
if bboxes[0].label is not None and \
label_categories[bboxes[0].label].name:
name_id = label_categories[bboxes[0].label].name \
+ '/' + item.id
else:
name_id = VggFace2Path.IMAGES_DIR_NO_LABEL \
+ '/' + item.id
bboxes_table.append({'NAME_ID': name_id, 'X': bbox.x,
'Y': bbox.y, 'W': bbox.w, 'H': bbox.h})
bboxes_table.append({'NAME_ID': name_id, 'X': bboxes[0].x,
'Y': bboxes[0].y, 'W': bboxes[0].w, 'H': bboxes[0].h})

labels = [a for a in item.annotations
if a.type == AnnotationType.label]
Expand Down
67 changes: 37 additions & 30 deletions datumaro/plugins/widerface_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,23 +30,21 @@ def __init__(self, path):
self._dataset_dir = osp.dirname(osp.dirname(path))

subset = osp.splitext(osp.basename(path))[0]
match = re.fullmatch(r'wider_face_\S+_bbx_gt', subset)
if match:
if re.fullmatch(r'wider_face_\S+_bbx_gt', subset):
subset = subset.split('_')[2]
super().__init__(subset=subset)

self._categories = self._load_categories()
self._items = list(self._load_items(path).values())

def _load_categories(self):
self._categories[AnnotationType.label] = LabelCategories()
label_cat = LabelCategories()

path = osp.join(self._dataset_dir, WiderFacePath.LABELS_FILE)
if osp.isfile(path):
with open(path, encoding='utf-8') as labels_file:
labels = [s.strip() for s in labels_file]
for label in labels:
label_cat.add(label)
for line in labels_file:
label_cat.add(line.strip())
else:
subset_path = osp.join(self._dataset_dir,
WiderFacePath.SUBSET_DIR + self._subset,
Expand All @@ -58,10 +56,12 @@ def _load_categories(self):
if '--' in images_dir:
images_dir = images_dir.split('--')[1]
label_cat.add(images_dir)

return { AnnotationType.label: label_cat }

def _load_items(self, path):
items = {}

with open(path, 'r') as f:
lines = f.readlines()

Expand All @@ -70,8 +70,9 @@ def _load_items(self, path):

for image_id in image_ids:
image = lines[image_id]
image_path = osp.join(self._dataset_dir, WiderFacePath.SUBSET_DIR
+ self._subset, WiderFacePath.IMAGES_DIR, image[:-1])
image_path = osp.join(self._dataset_dir,
WiderFacePath.SUBSET_DIR + self._subset,
WiderFacePath.IMAGES_DIR, image[:-1])
item_id = image[:-(len(WiderFacePath.IMAGE_EXT) + 1)]
annotations = []
if '/' in item_id:
Expand All @@ -88,18 +89,26 @@ def _load_items(self, path):
bbox_lines = lines[image_id + 2 : image_id + int(bbox_count) + 2]
for bbox in bbox_lines:
bbox_list = bbox.split()
if len(bbox_list) >= 4:
if 4 <= len(bbox_list):
attributes = {}
if len(bbox_list) == 10:
label = None
if len(bbox_list) == 5 or len(bbox_list) == 11:
if len(bbox_list) == 5:
label_name = bbox_list[4]
else:
label_name = bbox_list[10]
label = \
self._categories[AnnotationType.label].find(label_name)[0]
if 10 <= len(bbox_list):
i = 4
for attr in WiderFacePath.BBOX_ATTRIBUTES:
if bbox_list[i] != '-':
attributes[attr] = int(bbox_list[i])
attributes[attr] = bbox_list[i]
i += 1
annotations.append(Bbox(
float(bbox_list[0]), float(bbox_list[1]),
float(bbox_list[2]), float(bbox_list[3]),
attributes = attributes
attributes=attributes, label=label
))

items[item_id] = DatasetItem(id=item_id, subset=self._subset,
Expand All @@ -126,35 +135,30 @@ def apply(self):
f.write('\n'.join(label.name for label in label_categories))

for subset_name, subset in self._extractor.subsets().items():
subset_dir = osp.join(save_dir, WiderFacePath.SUBSET_DIR + subset_name)
subset_dir = osp.join(save_dir,
WiderFacePath.SUBSET_DIR + subset_name)

wider_annotation = ''
for item in subset:
labels = [a.label for a in item.annotations
if a.type == AnnotationType.label]
if labels:
wider_annotation += '%s\n' % (str(labels[0]) + '--' \
+ label_categories[labels[0]].name + '/' \
+ item.id + WiderFacePath.IMAGE_EXT)
if item.has_image and self._save_images:
self._save_image(item, osp.join(save_dir, subset_dir,
WiderFacePath.IMAGES_DIR, str(labels[0]) + '--' \
+ label_categories[labels[0]].name + '/' + item.id \
+ WiderFacePath.IMAGE_EXT))
image_path = '%s--%s/%s' % (
labels[0], label_categories[labels[0]].name,
item.id + WiderFacePath.IMAGE_EXT)
else:
wider_annotation += '%s\n' % (WiderFacePath.IMAGES_DIR_NO_LABEL \
+ '/' + item.id + WiderFacePath.IMAGE_EXT)
if item.has_image and self._save_images:
self._save_image(item, osp.join(save_dir, subset_dir,
WiderFacePath.IMAGES_DIR, WiderFacePath.IMAGES_DIR_NO_LABEL \
+ '/' + item.id + WiderFacePath.IMAGE_EXT))
image_path = '%s/%s' % (WiderFacePath.IMAGES_DIR_NO_LABEL,
item.id + WiderFacePath.IMAGE_EXT)
wider_annotation += image_path + '\n'
if item.has_image and self._save_images:
self._save_image(item, osp.join(save_dir, subset_dir,
WiderFacePath.IMAGES_DIR, image_path))

bboxes = [a for a in item.annotations
if a.type == AnnotationType.bbox]

wider_annotation += '%s\n' % len(bboxes)
for bbox in bboxes:
wider_bb = ' '.join('%d' % p for p in bbox.get_bbox())
wider_bb = ' '.join('%s' % p for p in bbox.get_bbox())
wider_annotation += '%s ' % wider_bb
if bbox.attributes:
wider_attr = ''
Expand All @@ -165,9 +169,12 @@ def apply(self):
attr_counter += 1
else:
wider_attr += '- '
if attr_counter > 0:
if 0 < attr_counter:
wider_annotation += wider_attr
if bbox.label is not None:
wider_annotation += '%s' % label_categories[bbox.label].name
wider_annotation += '\n'

annotation_path = osp.join(save_dir, WiderFacePath.ANNOTATIONS_DIR,
'wider_face_' + subset_name + '_bbx_gt.txt')
os.makedirs(osp.dirname(annotation_path), exist_ok=True)
Expand Down
Loading

0 comments on commit 1325eef

Please sign in to comment.