Skip to content

Commit

Permalink
Update CVAT formats to use changes in Datumaro API (#2794)
Browse files Browse the repository at this point in the history
* update cvat formats to use datumaro changes

* Update datumaro dependency

* Add comments on datumaro install in requirements

* fix linter

* Fix test

* fix linter

* Update datumaro version to 0.1.6.1
  • Loading branch information
Maxim Zhiltsov authored Mar 3, 2021
1 parent d5312df commit 9b62b71
Show file tree
Hide file tree
Showing 13 changed files with 88 additions and 98 deletions.
22 changes: 10 additions & 12 deletions cvat/apps/dataset_manager/formats/camvid.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from tempfile import TemporaryDirectory

from datumaro.components.project import Dataset
from datumaro.components.dataset import Dataset
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
Expand All @@ -17,16 +17,15 @@

@exporter(name='CamVid', ext='ZIP', version='1.0')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
envt = dm_env.transforms
extractor = extractor.transform(envt.get('polygons_to_masks'))
extractor = extractor.transform(envt.get('boxes_to_masks'))
extractor = extractor.transform(envt.get('merge_instance_segments'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
label_map = make_colormap(task_data)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('camvid').convert(extractor,
save_dir=temp_dir, save_images=save_images, apply_colormap=True,
dataset.export(temp_dir, 'camvid',
save_images=save_images, apply_colormap=True,
label_map={label: label_map[label][0] for label in label_map})

make_zip_archive(temp_dir, dst_file)
Expand All @@ -36,7 +35,6 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)

dataset = dm_env.make_importer('camvid')(tmp_dir).make_dataset()
masks_to_polygons = dm_env.transforms.get('masks_to_polygons')
dataset = dataset.transform(masks_to_polygons)
dataset = Dataset.import_from(tmp_dir, 'camvid', env=dm_env)
dataset.transform('masks_to_polygons')
import_dm_annotations(dataset, task_data)
15 changes: 8 additions & 7 deletions cvat/apps/dataset_manager/formats/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import zipfile
from tempfile import TemporaryDirectory

from datumaro.components.project import Dataset
from datumaro.components.dataset import Dataset

from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \
import_dm_annotations
from cvat.apps.dataset_manager.util import make_zip_archive
Expand All @@ -15,11 +16,10 @@

@exporter(name='COCO', ext='ZIP', version='1.0')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('coco_instances').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'coco_instances', save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand All @@ -29,8 +29,9 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)

dataset = dm_env.make_importer('coco')(tmp_dir).make_dataset()
dataset = Dataset.import_from(tmp_dir, 'coco', env=dm_env)
import_dm_annotations(dataset, task_data)
else:
dataset = dm_env.make_extractor('coco_instances', src_file.name)
dataset = Dataset.import_from(src_file.name,
'coco_instances', env=dm_env)
import_dm_annotations(dataset, task_data)
3 changes: 2 additions & 1 deletion cvat/apps/dataset_manager/formats/cvat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from glob import glob
from tempfile import TemporaryDirectory

from datumaro.components.extractor import DatasetItem

from cvat.apps.dataset_manager.bindings import match_dm_item
from cvat.apps.dataset_manager.util import make_zip_archive
from cvat.apps.engine.frame_provider import FrameProvider
from datumaro.components.extractor import DatasetItem

from .registry import exporter, importer

Expand Down
20 changes: 9 additions & 11 deletions cvat/apps/dataset_manager/formats/imagenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
# SPDX-License-Identifier: MIT

import os.path as osp
from glob import glob

import zipfile
from glob import glob
from tempfile import TemporaryDirectory

from datumaro.components.project import Dataset
from datumaro.components.dataset import Dataset

from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \
import_dm_annotations
from cvat.apps.dataset_manager.util import make_zip_archive
Expand All @@ -18,15 +18,13 @@

@exporter(name='ImageNet', ext='ZIP', version='1.0')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transform
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
if save_images:
dm_env.converters.get('imagenet').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'imagenet', save_images=save_images)
else:
dm_env.converters.get('imagenet_txt').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'imagenet_txt', save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand All @@ -35,7 +33,7 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
zipfile.ZipFile(src_file).extractall(tmp_dir)
if glob(osp.join(tmp_dir, '*.txt')):
dataset = dm_env.make_importer('imagenet_txt')(tmp_dir).make_dataset()
dataset = Dataset.import_from(tmp_dir, 'imagenet_txt', env=dm_env)
else:
dataset = dm_env.make_importer('imagenet')(tmp_dir).make_dataset()
dataset = Dataset.import_from(tmp_dir, 'imagenet', env=dm_env)
import_dm_annotations(dataset, task_data)
14 changes: 6 additions & 8 deletions cvat/apps/dataset_manager/formats/labelme.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,22 @@

from tempfile import TemporaryDirectory

from datumaro.components.dataset import Dataset
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.project import Dataset

from .registry import dm_env, exporter, importer


@exporter(name='LabelMe', ext='ZIP', version='3.0')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('label_me').convert(extractor, save_dir=temp_dir,
save_images=save_images)
dataset.export(temp_dir, 'label_me', save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand All @@ -29,7 +28,6 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)

dataset = dm_env.make_importer('label_me')(tmp_dir).make_dataset()
masks_to_polygons = dm_env.transforms.get('masks_to_polygons')
dataset = dataset.transform(masks_to_polygons)
dataset = Dataset.import_from(tmp_dir, 'label_me', env=dm_env)
dataset.transform('masks_to_polygons')
import_dm_annotations(dataset, task_data)
21 changes: 9 additions & 12 deletions cvat/apps/dataset_manager/formats/mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,26 @@

from tempfile import TemporaryDirectory

from datumaro.components.dataset import Dataset
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.project import Dataset

from .registry import dm_env, exporter, importer
from .utils import make_colormap


@exporter(name='Segmentation mask', ext='ZIP', version='1.1')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
envt = dm_env.transforms
extractor = extractor.transform(envt.get('polygons_to_masks'))
extractor = extractor.transform(envt.get('boxes_to_masks'))
extractor = extractor.transform(envt.get('merge_instance_segments'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('voc_segmentation').convert(extractor,
save_dir=temp_dir, save_images=save_images,
dataset.export(temp_dir, 'voc_segmentation', save_images=save_images,
apply_colormap=True, label_map=make_colormap(task_data))

make_zip_archive(temp_dir, dst_file)
Expand All @@ -35,7 +33,6 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)

dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset()
masks_to_polygons = dm_env.transforms.get('masks_to_polygons')
dataset = dataset.transform(masks_to_polygons)
dataset = Dataset.import_from(tmp_dir, 'voc', env=dm_env)
dataset.transform('masks_to_polygons')
import_dm_annotations(dataset, task_data)
13 changes: 6 additions & 7 deletions cvat/apps/dataset_manager/formats/mot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,22 @@

from tempfile import TemporaryDirectory

import datumaro.components.extractor as datumaro
from datumaro.components.dataset import Dataset
from pyunpack import Archive

import datumaro.components.extractor as datumaro
from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.project import Dataset

from .registry import dm_env, exporter, importer


@exporter(name='MOT', ext='ZIP', version='1.1')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('mot_seq_gt').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'mot_seq_gt', save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand All @@ -29,7 +28,7 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)

dataset = dm_env.make_importer('mot_seq')(tmp_dir).make_dataset()
dataset = Dataset.import_from(tmp_dir, 'mot_seq', env=dm_env)

tracks = {}
label_cat = dataset.categories()[datumaro.AnnotationType.label]
Expand Down
25 changes: 11 additions & 14 deletions cvat/apps/dataset_manager/formats/mots.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

from tempfile import TemporaryDirectory

from datumaro.components.dataset import Dataset
from datumaro.components.extractor import AnnotationType, Transform
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
find_dataset_root, match_dm_item)
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.extractor import AnnotationType, Transform
from datumaro.components.project import Dataset

from .registry import dm_env, exporter, importer

Expand All @@ -22,16 +22,14 @@ def transform_item(self, item):

@exporter(name='MOTS PNG', ext='ZIP', version='1.0')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
envt = dm_env.transforms
extractor = extractor.transform(KeepTracks) # can only export tracks
extractor = extractor.transform(envt.get('polygons_to_masks'))
extractor = extractor.transform(envt.get('boxes_to_masks'))
extractor = extractor.transform(envt.get('merge_instance_segments'))
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
dataset.transform(KeepTracks) # can only export tracks
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('mots_png').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'mots_png', save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand All @@ -40,9 +38,8 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)

dataset = dm_env.make_importer('mots')(tmp_dir).make_dataset()
masks_to_polygons = dm_env.transforms.get('masks_to_polygons')
dataset = dataset.transform(masks_to_polygons)
dataset = Dataset.import_from(tmp_dir, 'mots', env=dm_env)
dataset.transform('masks_to_polygons')

tracks = {}
label_cat = dataset.categories()[AnnotationType.label]
Expand Down
16 changes: 7 additions & 9 deletions cvat/apps/dataset_manager/formats/pascal_voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,25 @@
import os.path as osp
import shutil
from glob import glob

from tempfile import TemporaryDirectory

from datumaro.components.dataset import Dataset
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.project import Dataset

from .registry import dm_env, exporter, importer


@exporter(name='PASCAL VOC', ext='ZIP', version='1.1')
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('voc').convert(extractor,
save_dir=temp_dir, save_images=save_images, label_map='source')
dataset.export(temp_dir, 'voc', save_images=save_images,
label_map='source')

make_zip_archive(temp_dir, dst_file)

Expand Down Expand Up @@ -56,7 +55,6 @@ def _import(src_file, task_data):
for f in anno_files:
shutil.move(f, anno_dir)

dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset()
masks_to_polygons = dm_env.transforms.get('masks_to_polygons')
dataset = dataset.transform(masks_to_polygons)
dataset = Dataset.import_from(tmp_dir, 'voc', env=dm_env)
dataset.transform('masks_to_polygons')
import_dm_annotations(dataset, task_data)
9 changes: 4 additions & 5 deletions cvat/apps/dataset_manager/formats/tfrecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,10 @@

@exporter(name='TFRecord', ext='ZIP', version='1.0', enabled=tf_available)
def _export(dst_file, task_data, save_images=False):
extractor = CvatTaskDataExtractor(task_data, include_images=save_images)
extractor = Dataset.from_extractors(extractor) # apply lazy transforms
dataset = Dataset.from_extractors(CvatTaskDataExtractor(
task_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as temp_dir:
dm_env.converters.get('tf_detection_api').convert(extractor,
save_dir=temp_dir, save_images=save_images)
dataset.export(temp_dir, 'tf_detection_api', save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand All @@ -37,5 +36,5 @@ def _import(src_file, task_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)

dataset = dm_env.make_importer('tf_detection_api')(tmp_dir).make_dataset()
dataset = Dataset.import_from(tmp_dir, 'tf_detection_api', env=dm_env)
import_dm_annotations(dataset, task_data)
Loading

0 comments on commit 9b62b71

Please sign in to comment.