diff --git a/README.md b/README.md index 2c3a2f70ba28..00697b4bbe23 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,7 @@ CVAT annotations ---> Publication, statistics etc. - Simple checking for errors - Comparison with model infernece - Merging and comparison of multiple datasets + - Annotation validation based on the task type(classification, etc) - Dataset comparison - Dataset statistics (image mean and std, annotation statistics) - Model integration diff --git a/docs/user_manual.md b/docs/user_manual.md index df9bb331242f..cee40321dcb1 100644 --- a/docs/user_manual.md +++ b/docs/user_manual.md @@ -20,6 +20,7 @@ - [Compare projects](#compare-projects) - [Obtaining project info](#get-project-info) - [Obtaining project statistics](#get-project-statistics) + - [Validate project annotations](#validate-project-annotations) - [Register model](#register-model) - [Run inference](#run-model) - [Run inference explanation](#explain-inference) @@ -878,6 +879,155 @@ datum stats -p test_project + +### Validate project annotations + +This command inspects annotations with respect to the task type +and stores the result in JSON file. + +The task types supported are `classification`, `detection`, and `segmentation`. + +The validation result contains +- annotation statistics based on the task type +- validation reports, such as + - items not having annotations + - items having undefined annotations + - imbalanced distribution in class/attributes + - too small or large values +- summary + +Usage: + +``` bash +datum validate --help + +datum validate -p +``` + +Validation Result: + +
+ +``` bash +{ + 'statistics': { + ## common statistics + 'label_distribution': { + 'defined_labels': , # : + 'undefined_labels': + # : { + # 'count': , + # 'items_with_undefined_label': [, ] + # } + }, + 'attribute_distribution': { + 'defined_attributes': , + # : { + # : { + # 'distribution': {: , }, + # 'items_missing_attribute': [, ] + # } + # } + 'undefined_attributes': + # : { + # : { + # 'distribution': {: , }, + # 'items_with_undefined_attr': [, ] + # } + # } + }, + 'total_ann_count': , + 'items_missing_annotation': , # [, ] + + ## statistics for classification task + 'items_with_multiple_labels': , # [, ] + + ## statistics for detection task + 'items_with_invalid_value': , + # '': {: [ , ], } + # - properties: 'x', 'y', 'width', 'height', + # 'area(wxh)', 'ratio(w/h)', 'short', 'long' + # - 'short' is min(w,h) and 'long' is max(w,h). + 'items_with_negative_length': , + # '': { : { <'width'|'height'>: , }, } + 'bbox_distribution_in_label': , # : + 'bbox_distribution_in_attribute': , + # : {: { : , }, } + 'bbox_distribution_in_dataset_item': , + # '': + + ## statistics for segmentation task + 'items_with_invalid_value'] = , + # '': {: [ , ], } + # - properties: 'area', 'width', 'height' + 'mask_distribution_in_label'] = , # : + 'mask_distribution_in_attribute'] = , + # : { + # : { : , } + # } + 'mask_distribution_in_dataset_item'] = , + # '': + }, + 'validation_reports': , #[ , ] + # validation_error_format = { + # 'anomaly_type': , # see datumaro/components/errors.py + # 'description': , # see datumaro/components/errors.py + # 'severity': , # 'warning' or 'error' + # 'item_id': , # optional, when it is related to a DatasetItem + # 'subset': , # optional, when it is related to a DatasetItem + # } + 'summary': { + 'errors': , + 'warnings': + } +} + +``` + +`item_key` is defined as, +``` python +item_key = (, ) +``` + +`bbox_template` and `mask_template` are defined as, + +``` python +bbox_template = { + 'width': , + 'height': , + 'area(wxh)': , + 'ratio(w/h)': , + 'short': , # short = min(w, h) + 'long': # long = max(w, h) +} +mask_template = { + 'area': , + 'width': , + 'height': +} +``` + +`numerical_stat_template` is defined as, + +``` python +numerical_stat_template = { + 'items_far_from_mean': , + # {'': {: , }, } + 'mean': , + 'stdev': , + 'min': , + 'max': , + 'median': , + 'histogram': { + 'bins': , # [, ] + 'counts': , # [, ] + } +} +``` + +
+ + ### Register model Supported models: