add validation item for instance segmentation (cvat-ai#227)

* add validation item for instance segmentation
TOsmanov · Apr 29, 2021 · ec4b013 · ec4b013
1 parent b9469d9
commit ec4b013
Show file tree

Hide file tree

Showing 4 changed files with 1,004 additions and 494 deletions.
diff --git a/datumaro/cli/contexts/project/__init__.py b/datumaro/cli/contexts/project/__init__.py
@@ -8,6 +8,7 @@
 import os
 import os.path as osp
 import shutil
+import numpy as np
 from enum import Enum
 
 from datumaro.components.dataset_filter import DatasetItemEncoder
@@ -815,28 +816,34 @@ def validate_command(args):
     project = load_project(args.project_dir)
     task_type = args.task_type
     subset_name = args.subset_name
-    dst_file_name = 'validation_results'
+    dst_file_name = f'validation_results-{task_type}'
 
     dataset = project.make_dataset()
     if subset_name is not None:
         dataset = dataset.get_subset(subset_name)
         dst_file_name += f'-{subset_name}'
     validation_results = validate_annotations(dataset, task_type)
 
-    def _convert_tuple_keys_to_str(d):
+    def numpy_encoder(obj):
+        if isinstance(obj, np.generic):
+            return obj.item()
+
+    def _make_serializable(d):
         for key, val in list(d.items()):
+            # tuple key to str
             if isinstance(key, tuple):
                 d[str(key)] = val
                 d.pop(key)
             if isinstance(val, dict):
-                _convert_tuple_keys_to_str(val)
+                _make_serializable(val)
 
-    _convert_tuple_keys_to_str(validation_results)
+    _make_serializable(validation_results)
 
     dst_file = generate_next_file_name(dst_file_name, ext='.json')
     log.info("Writing project validation results to '%s'" % dst_file)
     with open(dst_file, 'w') as f:
-        json.dump(validation_results, f, indent=4, sort_keys=True)
+        json.dump(validation_results, f, indent=4, sort_keys=True,
+                  default=numpy_encoder)
 
 def build_parser(parser_ctor=argparse.ArgumentParser):
     parser = parser_ctor(

diff --git a/datumaro/components/errors.py b/datumaro/components/errors.py
@@ -119,10 +119,13 @@ def __str__(self):
         return "Metadata (ex. LabelCategories) should be defined" \
             " to validate a dataset."
 
+
 @attrs
-class MissingLabelAnnotation(DatasetItemValidationError):
+class MissingAnnotation(DatasetItemValidationError):
+    ann_type = attrib()
     def __str__(self):
-        return "Item needs a label, but not found."
+        return f"Item needs '{self.ann_type}' annotation(s), " \
+            "but not found."
 
 @attrs
 class MultiLabelAnnotations(DatasetItemValidationError):
@@ -228,40 +231,34 @@ def __str__(self):
             f" '{self. attr_name}' for the label '{self.label_name}'."
 
 @attrs
-class ImbalancedBboxDistInLabel(DatasetValidationError):
+class ImbalancedDistInLabel(DatasetValidationError):
     label_name = attrib()
     prop = attrib()
 
     def __str__(self):
-        return f"Values of bbox '{self.prop}' are not evenly " \
+        return f"Values of '{self.prop}' are not evenly " \
                 f"distributed for '{self.label_name}' label."
 
 @attrs
-class ImbalancedBboxDistInAttribute(DatasetValidationError):
+class ImbalancedDistInAttribute(DatasetValidationError):
     label_name = attrib()
     attr_name = attrib()
     attr_value = attrib()
     prop = attrib()
 
     def __str__(self):
-        return f"Values of bbox '{self.prop}' are not evenly " \
+        return f"Values of '{self.prop}' are not evenly " \
             f"distributed for '{self.attr_name}' = '{self.attr_value}' for " \
             f"the '{self.label_name}' label."
 
-@attrs
-class MissingBboxAnnotation(DatasetItemValidationError):
-    def __str__(self):
-        return 'Item needs one or more bounding box annotations, ' \
-            'but not found.'
-
 @attrs
 class NegativeLength(DatasetItemValidationError):
     ann_id = attrib()
     prop = attrib()
     val = attrib()
 
     def __str__(self):
-        return f"Bounding box annotation '{self.ann_id}' in " \
+        return f"Annotation '{self.ann_id}' in " \
             "the item should have a positive value of " \
             f"'{self.prop}' but got '{self.val}'."
 
@@ -271,9 +268,9 @@ class InvalidValue(DatasetItemValidationError):
     prop = attrib()
 
     def __str__(self):
-        return f"Bounding box annotation '{self.ann_id}' in " \
+        return f"Annotation '{self.ann_id}' in " \
             'the item has an inf or a NaN value of ' \
-            f"bounding box '{self.prop}'."
+            f"'{self.prop}'."
 
 @attrs
 class FarFromLabelMean(DatasetItemValidationError):
@@ -284,8 +281,8 @@ class FarFromLabelMean(DatasetItemValidationError):
     val = attrib()
 
     def __str__(self):
-        return f"Bounding box annotation '{self.ann_id}' in " \
-            f"the item has a value of bounding box '{self.prop}' that " \
+        return f"Annotation '{self.ann_id}' in " \
+            f"the item has a value of '{self.prop}' that " \
             "is too far from the label average. (mean of " \
             f"'{self.label_name}' label: {self.mean}, got '{self.val}')."
 
@@ -300,8 +297,8 @@ class FarFromAttrMean(DatasetItemValidationError):
     val = attrib()
 
     def __str__(self):
-        return f"Bounding box annotation '{self.ann_id}' in the " \
-            f"item has a value of bounding box '{self.prop}' that " \
+        return f"Annotation '{self.ann_id}' in the " \
+            f"item has a value of '{self.prop}' that " \
             "is too far from the attribute average. (mean of " \
             f"'{self.attr_name}' = '{self.attr_value}' for the " \
             f"'{self.label_name}' label: {self.mean}, got '{self.val}')."