Skip to content

Commit

Permalink
add validation item for instance segmentation (cvat-ai#227)
Browse files Browse the repository at this point in the history
* add validation item for instance segmentation
  • Loading branch information
Jihyeon Yi authored Apr 29, 2021
1 parent b9469d9 commit ec4b013
Show file tree
Hide file tree
Showing 4 changed files with 1,004 additions and 494 deletions.
17 changes: 12 additions & 5 deletions datumaro/cli/contexts/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import os.path as osp
import shutil
import numpy as np
from enum import Enum

from datumaro.components.dataset_filter import DatasetItemEncoder
Expand Down Expand Up @@ -815,28 +816,34 @@ def validate_command(args):
project = load_project(args.project_dir)
task_type = args.task_type
subset_name = args.subset_name
dst_file_name = 'validation_results'
dst_file_name = f'validation_results-{task_type}'

dataset = project.make_dataset()
if subset_name is not None:
dataset = dataset.get_subset(subset_name)
dst_file_name += f'-{subset_name}'
validation_results = validate_annotations(dataset, task_type)

def _convert_tuple_keys_to_str(d):
def numpy_encoder(obj):
if isinstance(obj, np.generic):
return obj.item()

def _make_serializable(d):
for key, val in list(d.items()):
# tuple key to str
if isinstance(key, tuple):
d[str(key)] = val
d.pop(key)
if isinstance(val, dict):
_convert_tuple_keys_to_str(val)
_make_serializable(val)

_convert_tuple_keys_to_str(validation_results)
_make_serializable(validation_results)

dst_file = generate_next_file_name(dst_file_name, ext='.json')
log.info("Writing project validation results to '%s'" % dst_file)
with open(dst_file, 'w') as f:
json.dump(validation_results, f, indent=4, sort_keys=True)
json.dump(validation_results, f, indent=4, sort_keys=True,
default=numpy_encoder)

def build_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(
Expand Down
35 changes: 16 additions & 19 deletions datumaro/components/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,13 @@ def __str__(self):
return "Metadata (ex. LabelCategories) should be defined" \
" to validate a dataset."


@attrs
class MissingLabelAnnotation(DatasetItemValidationError):
class MissingAnnotation(DatasetItemValidationError):
ann_type = attrib()
def __str__(self):
return "Item needs a label, but not found."
return f"Item needs '{self.ann_type}' annotation(s), " \
"but not found."

@attrs
class MultiLabelAnnotations(DatasetItemValidationError):
Expand Down Expand Up @@ -228,40 +231,34 @@ def __str__(self):
f" '{self. attr_name}' for the label '{self.label_name}'."

@attrs
class ImbalancedBboxDistInLabel(DatasetValidationError):
class ImbalancedDistInLabel(DatasetValidationError):
label_name = attrib()
prop = attrib()

def __str__(self):
return f"Values of bbox '{self.prop}' are not evenly " \
return f"Values of '{self.prop}' are not evenly " \
f"distributed for '{self.label_name}' label."

@attrs
class ImbalancedBboxDistInAttribute(DatasetValidationError):
class ImbalancedDistInAttribute(DatasetValidationError):
label_name = attrib()
attr_name = attrib()
attr_value = attrib()
prop = attrib()

def __str__(self):
return f"Values of bbox '{self.prop}' are not evenly " \
return f"Values of '{self.prop}' are not evenly " \
f"distributed for '{self.attr_name}' = '{self.attr_value}' for " \
f"the '{self.label_name}' label."

@attrs
class MissingBboxAnnotation(DatasetItemValidationError):
def __str__(self):
return 'Item needs one or more bounding box annotations, ' \
'but not found.'

@attrs
class NegativeLength(DatasetItemValidationError):
ann_id = attrib()
prop = attrib()
val = attrib()

def __str__(self):
return f"Bounding box annotation '{self.ann_id}' in " \
return f"Annotation '{self.ann_id}' in " \
"the item should have a positive value of " \
f"'{self.prop}' but got '{self.val}'."

Expand All @@ -271,9 +268,9 @@ class InvalidValue(DatasetItemValidationError):
prop = attrib()

def __str__(self):
return f"Bounding box annotation '{self.ann_id}' in " \
return f"Annotation '{self.ann_id}' in " \
'the item has an inf or a NaN value of ' \
f"bounding box '{self.prop}'."
f"'{self.prop}'."

@attrs
class FarFromLabelMean(DatasetItemValidationError):
Expand All @@ -284,8 +281,8 @@ class FarFromLabelMean(DatasetItemValidationError):
val = attrib()

def __str__(self):
return f"Bounding box annotation '{self.ann_id}' in " \
f"the item has a value of bounding box '{self.prop}' that " \
return f"Annotation '{self.ann_id}' in " \
f"the item has a value of '{self.prop}' that " \
"is too far from the label average. (mean of " \
f"'{self.label_name}' label: {self.mean}, got '{self.val}')."

Expand All @@ -300,8 +297,8 @@ class FarFromAttrMean(DatasetItemValidationError):
val = attrib()

def __str__(self):
return f"Bounding box annotation '{self.ann_id}' in the " \
f"item has a value of bounding box '{self.prop}' that " \
return f"Annotation '{self.ann_id}' in the " \
f"item has a value of '{self.prop}' that " \
"is too far from the attribute average. (mean of " \
f"'{self.attr_name}' = '{self.attr_value}' for the " \
f"'{self.label_name}' label: {self.mean}, got '{self.val}')."
Loading

0 comments on commit ec4b013

Please sign in to comment.