diff --git a/CHANGELOG.md b/CHANGELOG.md index 37c4fd423ab..b453705f91e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - UI tracking has been reworked () - Manifest generation: Reduce creating time () - Migration from NPM 6 to NPM 7 () +- Update Datumaro dependency to 0.2.0 () ### Deprecated diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index fba124b5c88..1e1225add48 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -3,21 +3,25 @@ # # SPDX-License-Identifier: MIT -import sys import os.path as osp +import sys from collections import namedtuple -from typing import Any, Callable, DefaultDict, Dict, List, Literal, Mapping, NamedTuple, OrderedDict, Tuple, Union from pathlib import Path +from typing import (Any, Callable, DefaultDict, Dict, List, Literal, Mapping, + NamedTuple, OrderedDict, Tuple, Union) +import datumaro.components.annotation as datum_annotation +import datumaro.components.extractor as datum_extractor +from datumaro.util import cast +from datumaro.util.image import ByteImage, Image from django.utils import timezone -import datumaro.components.extractor as datumaro from cvat.apps.engine.frame_provider import FrameProvider -from cvat.apps.engine.models import AttributeType, ShapeType, Project, Task, Label, DimensionType, Image as Img -from datumaro.util import cast -from datumaro.util.image import ByteImage, Image +from cvat.apps.engine.models import AttributeType, DimensionType +from cvat.apps.engine.models import Image as Img +from cvat.apps.engine.models import Label, Project, ShapeType, Task -from .annotation import AnnotationManager, TrackManager, AnnotationIR +from .annotation import AnnotationIR, AnnotationManager, TrackManager class InstanceLabelData: @@ -192,7 +196,7 @@ def meta_for_task(db_task, host, label_mapping=None): ("bugtracker", db_task.bug_tracker), ("created", str(timezone.localtime(db_task.created_date))), ("updated", str(timezone.localtime(db_task.updated_date))), - ("subset", db_task.subset or datumaro.DEFAULT_SUBSET_NAME), + ("subset", db_task.subset or datum_extractor.DEFAULT_SUBSET_NAME), ("start_frame", str(db_task.data.start_frame)), ("stop_frame", str(db_task.data.stop_frame)), ("frame_filter", db_task.data.frame_filter), @@ -800,9 +804,10 @@ def categories(self) -> dict: @staticmethod def _load_categories(labels: list): - categories: Dict[datumaro.AnnotationType, datumaro.Categories] = {} + categories: Dict[datum_annotation.AnnotationType, + datum_annotation.Categories] = {} - label_categories = datumaro.LabelCategories(attributes=['occluded']) + label_categories = datum_annotation.LabelCategories(attributes=['occluded']) for _, label in labels: label_categories.add(label['name']) @@ -810,7 +815,7 @@ def _load_categories(labels: list): label_categories.attributes.add(attr['name']) - categories[datumaro.AnnotationType.label] = label_categories + categories[datum_annotation.AnnotationType.label] = label_categories return categories @@ -824,7 +829,7 @@ def _load_user_info(meta: dict): def _read_cvat_anno(self, cvat_frame_anno: Union[ProjectData.Frame, TaskData.Frame], labels: list): categories = self.categories() - label_cat = categories[datumaro.AnnotationType.label] + label_cat = categories[datum_annotation.AnnotationType.label] def map_label(name): return label_cat.find(name)[0] label_attrs = { label['name']: label['attributes'] @@ -834,7 +839,7 @@ def map_label(name): return label_cat.find(name)[0] return convert_cvat_anno_to_dm(cvat_frame_anno, label_attrs, map_label) -class CvatTaskDataExtractor(datumaro.SourceExtractor, CVATDataExtractorMixin): +class CvatTaskDataExtractor(datum_extractor.SourceExtractor, CVATDataExtractorMixin): def __init__(self, task_data, include_images=False, format_type=None, dimension=DimensionType.DIM_2D): super().__init__() self._categories = self._load_categories(task_data.meta['task']['labels']) @@ -893,7 +898,8 @@ def _make_image(i, **kwargs): dm_anno = self._read_cvat_anno(frame_data, task_data.meta['task']['labels']) if dimension == DimensionType.DIM_2D: - dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0], + dm_item = datum_extractor.DatasetItem( + id=osp.splitext(frame_data.name)[0], annotations=dm_anno, image=dm_image, attributes={'frame': frame_data.frame }) @@ -908,9 +914,11 @@ def _make_image(i, **kwargs): attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"]}) attributes["track_id"] = -1 - dm_item = datumaro.DatasetItem(id=osp.splitext(osp.split(frame_data.name)[-1])[0], - annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1], - attributes=attributes) + dm_item = datum_extractor.DatasetItem( + id=osp.splitext(osp.split(frame_data.name)[-1])[0], + annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1], + attributes=attributes + ) dm_items.append(dm_item) @@ -918,7 +926,7 @@ def _make_image(i, **kwargs): def _read_cvat_anno(self, cvat_frame_anno: TaskData.Frame, labels: list): categories = self.categories() - label_cat = categories[datumaro.AnnotationType.label] + label_cat = categories[datum_annotation.AnnotationType.label] def map_label(name): return label_cat.find(name)[0] label_attrs = { label['name']: label['attributes'] @@ -927,7 +935,7 @@ def map_label(name): return label_cat.find(name)[0] return convert_cvat_anno_to_dm(cvat_frame_anno, label_attrs, map_label, self._format_type, self._dimension) -class CVATProjectDataExtractor(datumaro.Extractor, CVATDataExtractorMixin): +class CVATProjectDataExtractor(datum_extractor.Extractor, CVATDataExtractorMixin): def __init__(self, project_data: ProjectData, include_images: bool = False, format_type: str = None, dimension: DimensionType = DimensionType.DIM_2D): super().__init__() self._categories = self._load_categories(project_data.meta['project']['labels']) @@ -935,7 +943,7 @@ def __init__(self, project_data: ProjectData, include_images: bool = False, form self._dimension = dimension self._format_type = format_type - dm_items: List[datumaro.DatasetItem] = [] + dm_items: List[datum_extractor.DatasetItem] = [] ext_per_task: Dict[int, str] = {} image_maker_per_task: Dict[int, Callable] = {} @@ -996,7 +1004,8 @@ def _make_image(i, **kwargs): dm_image = Image(**image_args) dm_anno = self._read_cvat_anno(frame_data, project_data.meta['project']['labels']) if self._dimension == DimensionType.DIM_2D: - dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0], + dm_item = datum_extractor.DatasetItem( + id=osp.splitext(frame_data.name)[0], annotations=dm_anno, image=dm_image, subset=frame_data.subset, attributes={'frame': frame_data.frame} @@ -1012,9 +1021,11 @@ def _make_image(i, **kwargs): attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"]}) attributes["track_id"] = -1 - dm_item = datumaro.DatasetItem(id=osp.splitext(osp.split(frame_data.name)[-1])[0], - annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1], - attributes=attributes, subset=frame_data.subset) + dm_item = datum_extractor.DatasetItem( + id=osp.splitext(osp.split(frame_data.name)[-1])[0], + annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1], + attributes=attributes, subset=frame_data.subset + ) dm_items.append(dm_item) self._items = dm_items @@ -1063,13 +1074,13 @@ def get_defaulted_subset(subset: str, subsets: List[str]) -> str: if subset: return subset else: - if datumaro.DEFAULT_SUBSET_NAME not in subsets: - return datumaro.DEFAULT_SUBSET_NAME + if datum_extractor.DEFAULT_SUBSET_NAME not in subsets: + return datum_extractor.DEFAULT_SUBSET_NAME else: i = 1 while i < sys.maxsize: - if f'{datumaro.DEFAULT_SUBSET_NAME}_{i}' not in subsets: - return f'{datumaro.DEFAULT_SUBSET_NAME}_{i}' + if f'{datum_extractor.DEFAULT_SUBSET_NAME}_{i}' not in subsets: + return f'{datum_extractor.DEFAULT_SUBSET_NAME}_{i}' i += 1 raise Exception('Cannot find default name for subset') @@ -1100,7 +1111,7 @@ def convert_attrs(label, cvat_attrs): anno_label = map_label(tag_obj.label) anno_attr = convert_attrs(tag_obj.label, tag_obj.attributes) - anno = datumaro.Label(label=anno_label, + anno = datum_annotation.Label(label=anno_label, attributes=anno_attr, group=anno_group) item_anno.append(anno) @@ -1121,20 +1132,20 @@ def convert_attrs(label, cvat_attrs): anno_points = shape_obj.points if shape_obj.type == ShapeType.POINTS: - anno = datumaro.Points(anno_points, + anno = datum_annotation.Points(anno_points, label=anno_label, attributes=anno_attr, group=anno_group, z_order=shape_obj.z_order) elif shape_obj.type == ShapeType.POLYLINE: - anno = datumaro.PolyLine(anno_points, + anno = datum_annotation.PolyLine(anno_points, label=anno_label, attributes=anno_attr, group=anno_group, z_order=shape_obj.z_order) elif shape_obj.type == ShapeType.POLYGON: - anno = datumaro.Polygon(anno_points, + anno = datum_annotation.Polygon(anno_points, label=anno_label, attributes=anno_attr, group=anno_group, z_order=shape_obj.z_order) elif shape_obj.type == ShapeType.RECTANGLE: x0, y0, x1, y1 = anno_points - anno = datumaro.Bbox(x0, y0, x1 - x0, y1 - y0, + anno = datum_annotation.Bbox(x0, y0, x1 - x0, y1 - y0, label=anno_label, attributes=anno_attr, group=anno_group, z_order=shape_obj.z_order) elif shape_obj.type == ShapeType.CUBOID: @@ -1144,9 +1155,10 @@ def convert_attrs(label, cvat_attrs): else: anno_id = index position, rotation, scale = anno_points[0:3], anno_points[3:6], anno_points[6:9] - anno = datumaro.Cuboid3d(id=anno_id, position=position, rotation=rotation, scale=scale, - label=anno_label, attributes=anno_attr, group=anno_group - ) + anno = datum_annotation.Cuboid3d( + id=anno_id, position=position, rotation=rotation, scale=scale, + label=anno_label, attributes=anno_attr, group=anno_group + ) else: continue else: @@ -1192,17 +1204,17 @@ def find_dataset_root(dm_dataset, task_data): def import_dm_annotations(dm_dataset, task_data): shapes = { - datumaro.AnnotationType.bbox: ShapeType.RECTANGLE, - datumaro.AnnotationType.polygon: ShapeType.POLYGON, - datumaro.AnnotationType.polyline: ShapeType.POLYLINE, - datumaro.AnnotationType.points: ShapeType.POINTS, - datumaro.AnnotationType.cuboid_3d: ShapeType.CUBOID + datum_annotation.AnnotationType.bbox: ShapeType.RECTANGLE, + datum_annotation.AnnotationType.polygon: ShapeType.POLYGON, + datum_annotation.AnnotationType.polyline: ShapeType.POLYLINE, + datum_annotation.AnnotationType.points: ShapeType.POINTS, + datum_annotation.AnnotationType.cuboid_3d: ShapeType.CUBOID } if len(dm_dataset) == 0: return - label_cat = dm_dataset.categories()[datumaro.AnnotationType.label] + label_cat = dm_dataset.categories()[datum_annotation.AnnotationType.label] root_hint = find_dataset_root(dm_dataset, task_data) @@ -1231,7 +1243,7 @@ def import_dm_annotations(dm_dataset, task_data): if hasattr(ann, 'label') and ann.label is None: raise CvatImportError("annotation has no label") if ann.type in shapes: - if ann.type == datumaro.AnnotationType.cuboid_3d: + if ann.type == datum_annotation.AnnotationType.cuboid_3d: try: ann.points = [*ann.position,*ann.rotation,*ann.scale,0,0,0,0,0,0,0] except Exception as e: @@ -1249,7 +1261,7 @@ def import_dm_annotations(dm_dataset, task_data): attributes=[task_data.Attribute(name=n, value=str(v)) for n, v in ann.attributes.items()], )) - elif ann.type == datumaro.AnnotationType.label: + elif ann.type == datum_annotation.AnnotationType.label: task_data.add_tag(task_data.Tag( frame=frame_number, label=label_cat.items[ann.label].name, diff --git a/cvat/apps/dataset_manager/formats/datumaro.py b/cvat/apps/dataset_manager/formats/datumaro.py new file mode 100644 index 00000000000..8d6f6d00669 --- /dev/null +++ b/cvat/apps/dataset_manager/formats/datumaro.py @@ -0,0 +1,67 @@ +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from tempfile import TemporaryDirectory + +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import ItemTransform +from datumaro.util.image import Image +from pyunpack import Archive + +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, + import_dm_annotations) +from cvat.apps.dataset_manager.util import make_zip_archive +from cvat.apps.engine.models import DimensionType + +from .registry import dm_env, exporter, importer + +class DeleteImagePath(ItemTransform): + def transform_item(self, item): + image = None + if item.has_image and item.image.has_data: + image = Image(data=item.image.data, size=item.image.size) + return item.wrap(image=image, point_cloud='', related_images=[]) + + +@exporter(name="Datumaro", ext="ZIP", version="1.0") +def _export(dst_file, instance_data, save_images=False): + dataset = Dataset.from_extractors(GetCVATDataExtractor( + instance_data=instance_data, include_images=save_images), env=dm_env) + if not save_images: + dataset.transform(DeleteImagePath) + with TemporaryDirectory() as tmp_dir: + dataset.export(tmp_dir, 'datumaro', save_images=save_images) + + make_zip_archive(tmp_dir, dst_file) + +@importer(name="Datumaro", ext="ZIP", version="1.0") +def _import(src_file, instance_data): + with TemporaryDirectory() as tmp_dir: + Archive(src_file.name).extractall(tmp_dir) + + dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env) + + import_dm_annotations(dataset, instance_data) + +@exporter(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D) +def _export(dst_file, instance_data, save_images=False): + dataset = Dataset.from_extractors(GetCVATDataExtractor( + instance_data=instance_data, include_images=save_images, + dimension=DimensionType.DIM_3D), env=dm_env) + + if not save_images: + dataset.transform(DeleteImagePath) + with TemporaryDirectory() as tmp_dir: + dataset.export(tmp_dir, 'datumaro', save_images=save_images) + + make_zip_archive(tmp_dir, dst_file) + +@importer(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D) +def _import(src_file, instance_data): + with TemporaryDirectory() as tmp_dir: + Archive(src_file.name).extractall(tmp_dir) + + dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env) + + import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/datumaro/__init__.py b/cvat/apps/dataset_manager/formats/datumaro/__init__.py deleted file mode 100644 index f4fe0423345..00000000000 --- a/cvat/apps/dataset_manager/formats/datumaro/__init__.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (C) 2019 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import json -import os -import os.path as osp -import shutil -from tempfile import TemporaryDirectory - -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, - import_dm_annotations, ProjectData) -from cvat.apps.dataset_manager.util import make_zip_archive -from cvat.settings.base import BASE_DIR -from datumaro.components.project import Project - -from ..registry import dm_env, exporter - - -@exporter(name="Datumaro", ext="ZIP", version="1.0") -class DatumaroProjectExporter: - _REMOTE_IMAGES_EXTRACTOR = 'cvat_rest_api_task_images' - _TEMPLATES_DIR = osp.join(osp.dirname(__file__), 'export_templates') - - @staticmethod - def _save_image_info(save_dir, instance_data): - os.makedirs(save_dir, exist_ok=True) - - config = { - 'server_url': instance_data._host or 'localhost' - } - if isinstance(instance_data, ProjectData): - config['project_id'] = instance_data.db_project.id - else: - config['task_id'] = instance_data.db_task.id - - images = [] - images_meta = { 'images': images, } - for frame_id, frame in enumerate(instance_data.frame_info.values()): - image_info = { - 'id': frame_id, - 'name': osp.basename(frame['path']), - 'width': frame['width'], - 'height': frame['height'], - } - if isinstance(instance_data, ProjectData): - image_info['subset'] = frame['subset'] - - with open(osp.join(save_dir, 'config.json'), - 'w', encoding='utf-8') as config_file: - json.dump(config, config_file) - with open(osp.join(save_dir, 'images_meta.json'), - 'w', encoding='utf-8') as images_file: - json.dump(images_meta, images_file) - - def _export(self, instance_data, save_dir, save_images=False): - dataset = GetCVATDataExtractor(instance_data, include_images=save_images) - db_instance = instance_data.db_project if isinstance(instance_data, ProjectData) else instance_data.db_task - dm_env.converters.get('datumaro_project').convert(dataset, - save_dir=save_dir, save_images=save_images, - project_config={ 'project_name': db_instance.name, } - ) - - project = Project.load(save_dir) - target_dir = project.config.project_dir - os.makedirs(target_dir, exist_ok=True) - shutil.copyfile( - osp.join(self._TEMPLATES_DIR, 'README.md'), - osp.join(target_dir, 'README.md')) - - if not save_images: - # add remote links to images - source_name = '{}_{}_images'.format( - 'project' if isinstance(instance_data, ProjectData) else 'task', - db_instance.id, - ) - project.add_source(source_name, { - 'format': self._REMOTE_IMAGES_EXTRACTOR, - }) - self._save_image_info( - osp.join(save_dir, project.local_source_dir(source_name)), - instance_data) - project.save() - - templates_dir = osp.join(self._TEMPLATES_DIR, 'plugins') - target_dir = osp.join(project.config.project_dir, - project.config.env_dir, project.config.plugins_dir) - os.makedirs(target_dir, exist_ok=True) - shutil.copyfile( - osp.join(templates_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py'), - osp.join(target_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py')) - - # Make CVAT CLI module available to the user - cvat_utils_dst_dir = osp.join(save_dir, 'cvat', 'utils') - os.makedirs(cvat_utils_dst_dir) - shutil.copytree(osp.join(BASE_DIR, 'utils', 'cli'), - osp.join(cvat_utils_dst_dir, 'cli')) - - def __call__(self, dst_file, instance_data, save_images=False): - with TemporaryDirectory() as temp_dir: - self._export(instance_data, save_dir=temp_dir, save_images=save_images) - make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/datumaro/export_templates/README.md b/cvat/apps/dataset_manager/formats/datumaro/export_templates/README.md deleted file mode 100644 index 9d1e00978af..00000000000 --- a/cvat/apps/dataset_manager/formats/datumaro/export_templates/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# Quick start - -``` bash -# optionally make a virtualenv -python -m virtualenv .venv -. .venv/bin/activate - -# install dependencies -pip install 'git+https://github.com/openvinotoolkit/datumaro' -pip install -r cvat/utils/cli/requirements.txt - -# set up environment -PYTHONPATH=':' -export PYTHONPATH - -# use Datumaro -datum --help -``` - -Check [Datumaro docs](https://github.com/openvinotoolkit/datumaro/README.md) for more info. diff --git a/cvat/apps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py b/cvat/apps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py deleted file mode 100644 index 359209cc352..00000000000 --- a/cvat/apps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import getpass -import json -import os -import os.path as osp -from collections import OrderedDict - -import requests - -from cvat.utils.cli.core import CLI as CVAT_CLI -from cvat.utils.cli.core import CVAT_API_V1 -from datumaro.components.config import Config, SchemaBuilder -from datumaro.components.extractor import SourceExtractor, DatasetItem -from datumaro.util.image import Image, lazy_image, load_image - -CONFIG_SCHEMA = SchemaBuilder() \ - .add('task_id', int) \ - .add('server_url', str) \ - .build() - -class cvat_rest_api_task_images(SourceExtractor): - def _image_local_path(self, item_id): - task_id = self._config.task_id - return osp.join(self._cache_dir, - 'task_{}_frame_{:06d}.jpg'.format(task_id, int(item_id))) - - def _make_image_loader(self, item_id): - return lazy_image(item_id, - lambda item_id: self._image_loader(item_id, self)) - - def _is_image_cached(self, item_id): - return osp.isfile(self._image_local_path(item_id)) - - def _download_image(self, item_id): - self._connect() - os.makedirs(self._cache_dir, exist_ok=True) - self._cvat_cli.tasks_frame(task_id=self._config.task_id, - frame_ids=[item_id], outdir=self._cache_dir, quality='original') - - def _connect(self): - if self._cvat_cli is not None: - return - - print("Enter credentials for '%s' to read task data:" % \ - (self._config.server_url)) - username = input('User: ') - password = getpass.getpass() - - session = requests.Session() - - api = CVAT_API_V1(self._config.server_url) - cli = CVAT_CLI(session, api, credentials=(username, password)) - self._cvat_cli = cli - - @staticmethod - def _image_loader(item_id, extractor): - if not extractor._is_image_cached(item_id): - extractor._download_image(item_id) - local_path = extractor._image_local_path(item_id) - return load_image(local_path) - - def __init__(self, url): - super().__init__() - - local_dir = url - self._local_dir = local_dir - self._cache_dir = osp.join(local_dir, 'images') - - with open(osp.join(url, 'config.json'), - 'r', encoding='utf-8') as config_file: - config = json.load(config_file) - config = Config(config, schema=CONFIG_SCHEMA) - self._config = config - - with open(osp.join(url, 'images_meta.json'), - 'r', encoding='utf-8') as images_file: - images_meta = json.load(images_file) - image_list = images_meta['images'] - - items = [] - for entry in image_list: - item_id = entry['id'] - item_filename = entry.get('name', str(item_id)) - size = None - if entry.get('height') and entry.get('width'): - size = (entry['height'], entry['width']) - image = Image(data=self._make_image_loader(item_id), - path=self._image_local_path(item_id), size=size) - item = DatasetItem(id=osp.splitext(item_filename)[0], image=image) - items.append((item.id, item)) - - items = OrderedDict(items) - self._items = items - - self._cvat_cli = None - - def __iter__(self): - for item in self._items.values(): - yield item - - def __len__(self): - return len(self._items) diff --git a/cvat/apps/dataset_manager/formats/icdar.py b/cvat/apps/dataset_manager/formats/icdar.py index 544e20decf8..3f25c803856 100644 --- a/cvat/apps/dataset_manager/formats/icdar.py +++ b/cvat/apps/dataset_manager/formats/icdar.py @@ -5,9 +5,10 @@ import zipfile from tempfile import TemporaryDirectory +from datumaro.components.annotation import (AnnotationType, Caption, Label, + LabelCategories) from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (AnnotationType, Caption, Label, - LabelCategories, ItemTransform) +from datumaro.components.extractor import ItemTransform from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, import_dm_annotations) diff --git a/cvat/apps/dataset_manager/formats/market1501.py b/cvat/apps/dataset_manager/formats/market1501.py index f578d3ab10e..3ba14f9891c 100644 --- a/cvat/apps/dataset_manager/formats/market1501.py +++ b/cvat/apps/dataset_manager/formats/market1501.py @@ -5,9 +5,10 @@ import zipfile from tempfile import TemporaryDirectory +from datumaro.components.annotation import (AnnotationType, Label, + LabelCategories) from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (AnnotationType, Label, - LabelCategories, ItemTransform) +from datumaro.components.extractor import ItemTransform from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, import_dm_annotations) diff --git a/cvat/apps/dataset_manager/formats/mots.py b/cvat/apps/dataset_manager/formats/mots.py index 9ba5b2268bc..fc27886c39d 100644 --- a/cvat/apps/dataset_manager/formats/mots.py +++ b/cvat/apps/dataset_manager/formats/mots.py @@ -4,8 +4,9 @@ from tempfile import TemporaryDirectory +from datumaro.components.annotation import AnnotationType from datumaro.components.dataset import Dataset -from datumaro.components.extractor import AnnotationType, ItemTransform +from datumaro.components.extractor import ItemTransform from pyunpack import Archive from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, diff --git a/cvat/apps/dataset_manager/formats/utils.py b/cvat/apps/dataset_manager/formats/utils.py index 0d545e46583..df3de855ed6 100644 --- a/cvat/apps/dataset_manager/formats/utils.py +++ b/cvat/apps/dataset_manager/formats/utils.py @@ -5,7 +5,7 @@ import os.path as osp from hashlib import blake2s -from datumaro.cli.util import make_file_name +from datumaro.util.os_util import make_file_name def get_color_from_index(index): diff --git a/cvat/apps/dataset_manager/tests/assets/annotations.json b/cvat/apps/dataset_manager/tests/assets/annotations.json index 2a7a0e212f8..078218204b4 100644 --- a/cvat/apps/dataset_manager/tests/assets/annotations.json +++ b/cvat/apps/dataset_manager/tests/assets/annotations.json @@ -244,7 +244,7 @@ "type": "polygon", "occluded": false, "z_order": 0, - "points": [35.0, 22.5, 53.32, 30.63, 22.34, 29.45, 47.43, 38.21], + "points": [35.0, 22.5, 53.32, 30.63, 22.34, 29.45, 47.43, 38.21], "frame": 0, "label_id": null, "group": 0, @@ -661,7 +661,7 @@ "points": [27.15, 26.7, 53.25, 24.8], "frame": 0, "label_id": null, - "group": 2, + "group": 1, "source": "manual", "attributes": [] }, @@ -675,34 +675,6 @@ "group": 0, "source": "manual", "attributes": [] - }, - { - "type": "cuboid", - "occluded": false, - "z_order": 2, - "points": [ - 51.65, - 37.3, - 51.65, - 46.8, - 70.25, - 37.2, - 70.25, - 46.8, - 72.11, - 36.34, - 72.11, - 45.74, - 53.51, - 36.34, - 53.51, - 45.74 - ], - "frame": 0, - "label_id": null, - "group": 1, - "source": "manual", - "attributes": [] } ], "tracks": [] @@ -920,7 +892,7 @@ } ] }, - "CVAT for images 1.1 tag": { + "CVAT for images 1.1 tag": { "version": 0, "tags": [ { @@ -982,7 +954,7 @@ "shapes": [], "tracks": [] }, - "CVAT for images 1.1 different types": { + "CVAT for images 1.1 different types": { "version": 0, "tags": [], "shapes": [ @@ -1028,7 +1000,7 @@ } ] }, - "CVAT for video 1.1 polygon": { + "CVAT for video 1.1 polygon": { "version": 0, "tags": [], "shapes": [], @@ -1069,7 +1041,7 @@ ], "attributes": [] }, - { + { "frame": 0, "label_id": null, "group": 1, diff --git a/cvat/apps/dataset_manager/tests/test_formats.py b/cvat/apps/dataset_manager/tests/test_formats.py index 69c7e34797e..22e95e07458 100644 --- a/cvat/apps/dataset_manager/tests/test_formats.py +++ b/cvat/apps/dataset_manager/tests/test_formats.py @@ -11,7 +11,7 @@ import datumaro from datumaro.components.dataset import Dataset, DatasetItem -from datumaro.components.extractor import Mask +from datumaro.components.annotation import Mask from django.contrib.auth.models import Group, User from PIL import Image @@ -278,6 +278,7 @@ def test_export_formats_query(self): 'CVAT for images 1.1', 'CVAT for video 1.1', 'Datumaro 1.0', + 'Datumaro 3D 1.0', 'LabelMe 3.0', 'MOT 1.1', 'MOTS PNG 1.0', @@ -321,7 +322,9 @@ def test_import_formats_query(self): 'ICDAR Localization 1.0', 'ICDAR Segmentation 1.0', 'Kitti Raw Format 1.0', - 'Sly Point Cloud Format 1.0' + 'Sly Point Cloud Format 1.0', + 'Datumaro 1.0', + 'Datumaro 3D 1.0' }) def test_exports(self): @@ -352,7 +355,7 @@ def test_empty_images_are_exported(self): ('COCO 1.0', 'coco'), ('CVAT for images 1.1', 'cvat'), # ('CVAT for video 1.1', 'cvat'), # does not support - ('Datumaro 1.0', 'datumaro_project'), + ('Datumaro 1.0', 'datumaro'), ('LabelMe 3.0', 'label_me'), # ('MOT 1.1', 'mot_seq'), # does not support # ('MOTS PNG 1.0', 'mots_png'), # does not support diff --git a/cvat/apps/dataset_manager/tests/test_rest_api_formats.py b/cvat/apps/dataset_manager/tests/test_rest_api_formats.py index f20bb02f00a..a297ac68690 100644 --- a/cvat/apps/dataset_manager/tests/test_rest_api_formats.py +++ b/cvat/apps/dataset_manager/tests/test_rest_api_formats.py @@ -347,8 +347,8 @@ def test_api_v1_dump_and_upload_annotations_with_objects_type_is_shape(self): # Dump annotations with objects type is shape for dump_format in dump_formats: if not dump_format.ENABLED or dump_format.DISPLAY_NAME in [ - 'Kitti Raw Format 1.0', 'Sly Point Cloud Format 1.0' - + 'Kitti Raw Format 1.0', 'Sly Point Cloud Format 1.0', + 'Datumaro 3D 1.0' ]: continue dump_format_name = dump_format.DISPLAY_NAME @@ -368,7 +368,7 @@ def test_api_v1_dump_and_upload_annotations_with_objects_type_is_shape(self): "MOT 1.1", "MOTS PNG 1.0", \ "PASCAL VOC 1.1", "Segmentation mask 1.1", \ "TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0", \ - "WiderFace 1.0", "VGGFace2 1.0", \ + "WiderFace 1.0", "VGGFace2 1.0", "Datumaro 1.0"\ ]: self._create_annotations(task, dump_format_name, "default") else: @@ -452,8 +452,8 @@ def test_api_v1_dump_annotations_with_objects_type_is_track(self): # Dump annotations with objects type is track for dump_format in dump_formats: if not dump_format.ENABLED or dump_format.DISPLAY_NAME in [ - 'Kitti Raw Format 1.0','Sly Point Cloud Format 1.0' - + 'Kitti Raw Format 1.0','Sly Point Cloud Format 1.0', + 'Datumaro 3D 1.0' ]: continue dump_format_name = dump_format.DISPLAY_NAME @@ -897,10 +897,9 @@ def test_api_v1_rewriting_annotations(self): with self.subTest(format=dump_format_name): if dump_format_name in [ "MOTS PNG 1.0", # issue #2925 and changed points values - "Datumaro 1.0", # Datumaro 1.0 is not in the list of import format 'Kitti Raw Format 1.0', - 'Sly Point Cloud Format 1.0' - + 'Sly Point Cloud Format 1.0', + 'Datumaro 3D 1.0' ]: self.skipTest("Format is fail") images = self._generate_task_images(3) @@ -917,7 +916,7 @@ def test_api_v1_rewriting_annotations(self): "MOT 1.1", "MOTS PNG 1.0", \ "PASCAL VOC 1.1", "Segmentation mask 1.1", \ "TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0", \ - "WiderFace 1.0", "VGGFace2 1.0", \ + "WiderFace 1.0", "VGGFace2 1.0", "Datumaro 1.0" \ ]: self._create_annotations(task, dump_format_name, "default") else: @@ -1002,13 +1001,13 @@ def test_api_v1_tasks_annotations_dump_and_upload_with_datumaro(self): with self.subTest(dump_format_name): if dump_format_name in [ "MOT 1.1", - "Datumaro 1.0", # not uploaded "CamVid 1.0", # issue #2840 and changed points values "MOTS PNG 1.0", # changed points values "Segmentation mask 1.1", # changed points values "ICDAR Segmentation 1.0", # changed points values 'Kitti Raw Format 1.0', - 'Sly Point Cloud Format 1.0' + 'Sly Point Cloud Format 1.0', + 'Datumaro 3D 1.0' ]: self.skipTest("Format is fail") @@ -1029,7 +1028,7 @@ def test_api_v1_tasks_annotations_dump_and_upload_with_datumaro(self): "MOT 1.1", "MOTS PNG 1.0", \ "PASCAL VOC 1.1", "Segmentation mask 1.1", \ "TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0", \ - "WiderFace 1.0", "VGGFace2 1.0", \ + "WiderFace 1.0", "VGGFace2 1.0", "Datumaro 1.0", \ ]: self._create_annotations(task, dump_format_name, "default") else: diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index 4f51c69a91e..12708242909 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -8,7 +8,7 @@ from datetime import timedelta import django_rq -from datumaro.cli.util import make_file_name +from datumaro.util.os_util import make_file_name from datumaro.util import to_snake_case from django.utils import timezone diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index f41a2e35c2b..6d4769d5bcd 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -4488,7 +4488,8 @@ def _run_api_v1_tasks_id_annotations_dump_load(self, owner, assignee, annotator) def _get_initial_annotation(annotation_format): if annotation_format not in ["Market-1501 1.0", "ICDAR Recognition 1.0", "ICDAR Localization 1.0", "ICDAR Segmentation 1.0", - 'Kitti Raw Format 1.0', 'Sly Point Cloud Format 1.0']: + 'Kitti Raw Format 1.0', 'Sly Point Cloud Format 1.0', + 'Datumaro 3D 1.0']: rectangle_tracks_with_attrs = [{ "frame": 0, "label_id": task["labels"][0]["id"], @@ -4832,7 +4833,8 @@ def _get_initial_annotation(annotation_format): ], }] annotations["tags"] = tags_with_attrs - elif annotation_format in ['Kitti Raw Format 1.0','Sly Point Cloud Format 1.0']: + elif annotation_format in ['Kitti Raw Format 1.0', + 'Sly Point Cloud Format 1.0', 'Datumaro 3D 1.0']: velodyne_wo_attrs = [{ "frame": 0, "label_id": task["labels"][0]["id"], diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index 7fa79df5e1d..06fbb6db799 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -53,5 +53,5 @@ google-cloud-storage==1.42.0 # --no-binary=pycocotools: workaround for binary incompatibility on numpy 1.20 # of pycocotools and tensorflow 2.4.1 # when pycocotools is installed by wheel in python 3.8+ -datumaro==0.1.10.1 --no-binary=datumaro --no-binary=pycocotools +datumaro==0.2.0 --no-binary=datumaro --no-binary=pycocotools urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/site/content/en/docs/manual/advanced/formats/format-datumaro.md b/site/content/en/docs/manual/advanced/formats/format-datumaro.md index 2cb63ef1201..7ac542b3486 100644 --- a/site/content/en/docs/manual/advanced/formats/format-datumaro.md +++ b/site/content/en/docs/manual/advanced/formats/format-datumaro.md @@ -13,3 +13,91 @@ is possible in Datumaro too, but Datumaro can offer dataset operations. - supported annotations: any 2D shapes, labels - supported attributes: any + +# Import annotations in Datumaro format + +Uploaded file: a zip archive of the following structure: + +```bash +.zip/ +└── annotations/ + ├── subset1.json # fully description of classes and all dataset items + └── subset2.json # fully description of classes and all dataset items +``` + +JSON annotations files in the `annotations` directory should have similar structure: + +```json +{ + "info": {}, + "categories": { + "label": { + "labels": [ + { + "name": "label_0", + "parent": "", + "attributes": [] + }, + { + "name": "label_1", + "parent": "", + "attributes": [] + } + ], + "attributes": [] + } + }, + "items": [ + { + "id": "img1", + "annotations": [ + { + "id": 0, + "type": "polygon", + "attributes": {}, + "group": 0, + "label_id": 1, + "points": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], + "z_order": 0 + }, + { + "id": 1, + "type": "bbox", + "attributes": {}, + "group": 1, + "label_id": 0, + "z_order": 0, + "bbox": [1.0, 2.0, 3.0, 4.0] + }, + { + "id": 2, + "type": "mask", + "attributes": {}, + "group": 1, + "label_id": 0, + "rle": { + "counts": "d0d0:F\\0", + "size": [10, 10] + }, + "z_order": 0 + } + ] + } + ] +} +``` + +# Export annotations in Datumaro format + +Downloaded file: a zip archive of the following structure: + +```bash +taskname.zip/ +├── annotations/ +│ └── default.json # fully description of classes and all dataset items +└── images/ # if the option `save images` was selected + └── default + ├── image1.jpg + ├── image2.jpg + ├── ... +```