From 9c788373f3dcb1bbbfd73c05cffe9d31993f91b4 Mon Sep 17 00:00:00 2001
From: zhiltsov-max <zhiltsov.max35@gmail.com>
Date: Fri, 5 Jun 2020 10:36:49 +0300
Subject: [PATCH] [Datumaro] Add tests for dataset examples (#1648)

* add dataset examples

* update docs

* update yolo tests

* join voc format test classes

* remplace voc extractor tests with import test

* update tfrecord format tests

* update mot tests

* update labelme tests

* update image dir tests
---
 datumaro/tests/test_image_dir_format.py |  16 +-
 datumaro/tests/test_labelme_format.py   |  24 +-
 datumaro/tests/test_mot_format.py       |  27 ++-
 datumaro/tests/test_tfrecord_format.py  |  46 ++--
 datumaro/tests/test_voc_format.py       | 292 +++---------------------
 datumaro/tests/test_yolo_format.py      |  17 +-
 6 files changed, 106 insertions(+), 316 deletions(-)

diff --git a/datumaro/tests/test_image_dir_format.py b/datumaro/tests/test_image_dir_format.py
index 30dd05b1c433..67302fef5a9b 100644
--- a/datumaro/tests/test_image_dir_format.py
+++ b/datumaro/tests/test_image_dir_format.py
@@ -9,16 +9,16 @@
 
 
 class ImageDirFormatTest(TestCase):
-    class TestExtractor(Extractor):
-        def __iter__(self):
-            return iter([
-                DatasetItem(id=1, image=np.ones((10, 6, 3))),
-                DatasetItem(id=2, image=np.ones((5, 4, 3))),
-            ])
-
     def test_can_load(self):
+        class TestExtractor(Extractor):
+            def __iter__(self):
+                return iter([
+                    DatasetItem(id=1, image=np.ones((10, 6, 3))),
+                    DatasetItem(id=2, image=np.ones((5, 4, 3))),
+                ])
+
         with TestDir() as test_dir:
-            source_dataset = self.TestExtractor()
+            source_dataset = TestExtractor()
 
             ImageDirConverter()(source_dataset, save_dir=test_dir)
 
diff --git a/datumaro/tests/test_labelme_format.py b/datumaro/tests/test_labelme_format.py
index 098fd26392ed..1b82e0666776 100644
--- a/datumaro/tests/test_labelme_format.py
+++ b/datumaro/tests/test_labelme_format.py
@@ -6,8 +6,8 @@
 from datumaro.components.extractor import (Extractor, DatasetItem,
     AnnotationType, Bbox, Mask, Polygon, LabelCategories
 )
-from datumaro.components.project import Dataset
-from datumaro.plugins.labelme_format import LabelMeExtractor, LabelMeImporter, \
+from datumaro.components.project import Project
+from datumaro.plugins.labelme_format import LabelMeImporter, \
     LabelMeConverter
 from datumaro.util.test_utils import TestDir, compare_datasets
 
@@ -111,8 +111,11 @@ def categories(self):
 
 DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset')
 
-class LabelMeExtractorTest(TestCase):
-    def test_can_load(self):
+class LabelMeImporterTest(TestCase):
+    def test_can_detect(self):
+        self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR))
+
+    def test_can_import(self):
         class DstExtractor(Extractor):
             def __iter__(self):
                 img1 = np.ones((77, 102, 3)) * 255
@@ -208,13 +211,6 @@ def categories(self):
                     AnnotationType.label: label_cat,
                 }
 
-        parsed = Dataset.from_extractors(LabelMeExtractor(DUMMY_DATASET_DIR))
-        compare_datasets(self, expected=DstExtractor(), actual=parsed)
-
-class LabelMeImporterTest(TestCase):
-    def test_can_detect(self):
-        self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR))
-
-    def test_can_import(self):
-        parsed = LabelMeImporter()(DUMMY_DATASET_DIR).make_dataset()
-        self.assertEqual(1, len(parsed))
+        parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \
+            .make_dataset()
+        compare_datasets(self, expected=DstExtractor(), actual=parsed)
\ No newline at end of file
diff --git a/datumaro/tests/test_mot_format.py b/datumaro/tests/test_mot_format.py
index efe625025727..2aaadd0d94ae 100644
--- a/datumaro/tests/test_mot_format.py
+++ b/datumaro/tests/test_mot_format.py
@@ -1,10 +1,12 @@
 import numpy as np
+import os.path as osp
 
 from unittest import TestCase
 
 from datumaro.components.extractor import (Extractor, DatasetItem,
     AnnotationType, Bbox, LabelCategories
 )
+from datumaro.components.project import Project
 from datumaro.plugins.mot_format import MotSeqGtConverter, MotSeqImporter
 from datumaro.util.test_utils import TestDir, compare_datasets
 
@@ -116,15 +118,25 @@ def categories(self):
                 SrcExtractor(), MotSeqGtConverter(save_images=True),
                 test_dir, target_dataset=DstExtractor())
 
+
+DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mot_dataset')
+
 class MotImporterTest(TestCase):
     def test_can_detect(self):
-        class TestExtractor(Extractor):
+        self.assertTrue(MotSeqImporter.detect(DUMMY_DATASET_DIR))
+
+    def test_can_import(self):
+        class DstExtractor(Extractor):
             def __iter__(self):
                 return iter([
-                    DatasetItem(id=1, subset='train',
+                    DatasetItem(id=1,
                         image=np.ones((16, 16, 3)),
                         annotations=[
-                            Bbox(0, 4, 4, 8, label=2),
+                            Bbox(0, 4, 4, 8, label=2, attributes={
+                                'occluded': False,
+                                'visibility': 1.0,
+                                'ignored': False,
+                            }),
                         ]
                     ),
                 ])
@@ -137,10 +149,7 @@ def categories(self):
                     AnnotationType.label: label_cat,
                 }
 
-        def generate_dummy_dataset(path):
-            MotSeqGtConverter()(TestExtractor(), save_dir=path)
-
-        with TestDir() as test_dir:
-            generate_dummy_dataset(test_dir)
+        dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \
+            .make_dataset()
 
-            self.assertTrue(MotSeqImporter.detect(test_dir))
+        compare_datasets(self, DstExtractor(), dataset)
\ No newline at end of file
diff --git a/datumaro/tests/test_tfrecord_format.py b/datumaro/tests/test_tfrecord_format.py
index cc55a9fc9a3f..403f9517345f 100644
--- a/datumaro/tests/test_tfrecord_format.py
+++ b/datumaro/tests/test_tfrecord_format.py
@@ -1,10 +1,12 @@
 import numpy as np
+import os.path as osp
 
 from unittest import TestCase, skipIf
 
 from datumaro.components.extractor import (Extractor, DatasetItem,
     AnnotationType, Bbox, Mask, LabelCategories
 )
+from datumaro.components.project import Project
 from datumaro.util.image import Image
 from datumaro.util.test_utils import TestDir, compare_datasets
 from datumaro.util.tf_util import check_import
@@ -56,17 +58,6 @@ def __iter__(self):
                             Bbox(2, 4, 4, 4),
                         ]
                     ),
-
-                    DatasetItem(id=2, subset='val',
-                        image=np.ones((8, 8, 3)),
-                        annotations=[
-                            Bbox(1, 2, 4, 2, label=3),
-                        ]
-                    ),
-
-                    DatasetItem(id=3, subset='test',
-                        image=np.ones((5, 4, 3)) * 3,
-                    ),
                 ])
 
             def categories(self):
@@ -188,17 +179,37 @@ def test_labelmap_parsing(self):
 
         self.assertEqual(expected, parsed)
 
+
+DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__),
+    'assets', 'tf_detection_api_dataset')
+
 @skipIf(import_failed, "Failed to import tensorflow")
 class TfrecordImporterTest(TestCase):
     def test_can_detect(self):
-        class TestExtractor(Extractor):
+        self.assertTrue(TfDetectionApiImporter.detect(DUMMY_DATASET_DIR))
+
+    def test_can_import(self):
+        class DstExtractor(Extractor):
             def __iter__(self):
                 return iter([
                     DatasetItem(id=1, subset='train',
                         image=np.ones((16, 16, 3)),
                         annotations=[
                             Bbox(0, 4, 4, 8, label=2),
-                        ]
+                            Bbox(0, 4, 4, 4, label=3),
+                            Bbox(2, 4, 4, 4),
+                        ],
+                    ),
+
+                    DatasetItem(id=2, subset='val',
+                        image=np.ones((8, 8, 3)),
+                        annotations=[
+                            Bbox(1, 2, 4, 2, label=3),
+                        ],
+                    ),
+
+                    DatasetItem(id=3, subset='test',
+                        image=np.ones((5, 4, 3)) * 3,
                     ),
                 ])
 
@@ -210,10 +221,7 @@ def categories(self):
                     AnnotationType.label: label_cat,
                 }
 
-        def generate_dummy_tfrecord(path):
-            TfDetectionApiConverter()(TestExtractor(), save_dir=path)
-
-        with TestDir() as test_dir:
-            generate_dummy_tfrecord(test_dir)
+        dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \
+            .make_dataset()
 
-            self.assertTrue(TfDetectionApiImporter.detect(test_dir))
\ No newline at end of file
+        compare_datasets(self, DstExtractor(), dataset)
diff --git a/datumaro/tests/test_voc_format.py b/datumaro/tests/test_voc_format.py
index d65113f0efed..92b63e6090e5 100644
--- a/datumaro/tests/test_voc_format.py
+++ b/datumaro/tests/test_voc_format.py
@@ -1,9 +1,6 @@
 from collections import OrderedDict
 import numpy as np
-import os
 import os.path as osp
-from xml.etree import ElementTree as ET
-import shutil
 
 from unittest import TestCase
 
@@ -11,13 +8,6 @@
     AnnotationType, Label, Bbox, Mask, LabelCategories,
 )
 import datumaro.plugins.voc_format.format as VOC
-from datumaro.plugins.voc_format.extractor import (
-    VocClassificationExtractor,
-    VocDetectionExtractor,
-    VocSegmentationExtractor,
-    VocLayoutExtractor,
-    VocActionExtractor,
-)
 from datumaro.plugins.voc_format.converter import (
     VocConverter,
     VocClassificationConverter,
@@ -28,11 +18,11 @@
 )
 from datumaro.plugins.voc_format.importer import VocImporter
 from datumaro.components.project import Project
-from datumaro.util.image import save_image, Image
+from datumaro.util.image import Image
 from datumaro.util.test_utils import TestDir, compare_datasets
 
 
-class VocTest(TestCase):
+class VocFormatTest(TestCase):
     def test_colormap_generator(self):
         reference = np.array([
             [  0,   0,   0],
@@ -61,115 +51,18 @@ def test_colormap_generator(self):
 
         self.assertTrue(np.array_equal(reference, list(VOC.VocColormap.values())))
 
-def get_label(extractor, label_id):
-    return extractor.categories()[AnnotationType.label].items[label_id].name
-
-def generate_dummy_voc(path):
-    cls_subsets_dir = osp.join(path, 'ImageSets', 'Main')
-    action_subsets_dir = osp.join(path, 'ImageSets', 'Action')
-    layout_subsets_dir = osp.join(path, 'ImageSets', 'Layout')
-    segm_subsets_dir = osp.join(path, 'ImageSets', 'Segmentation')
-    ann_dir = osp.join(path, 'Annotations')
-    img_dir = osp.join(path, 'JPEGImages')
-    segm_dir = osp.join(path, 'SegmentationClass')
-    inst_dir = osp.join(path, 'SegmentationObject')
-
-    os.makedirs(cls_subsets_dir)
-    os.makedirs(ann_dir)
-    os.makedirs(img_dir)
-    os.makedirs(segm_dir)
-    os.makedirs(inst_dir)
-
-    subsets = {
-        'train': ['2007_000001'],
-        'test': ['2007_000002'],
-    }
-
-    # Subsets
-    for subset_name, subset in subsets.items():
-        for item in subset:
-            with open(osp.join(cls_subsets_dir, subset_name + '.txt'), 'w') as f:
-                for item in subset:
-                    f.write('%s\n' % item)
-    shutil.copytree(cls_subsets_dir, action_subsets_dir)
-    shutil.copytree(cls_subsets_dir, layout_subsets_dir)
-    shutil.copytree(cls_subsets_dir, segm_subsets_dir)
-
-    # Classification
-    subset_name = 'train'
-    subset = subsets[subset_name]
-    for label in VOC.VocLabel:
-        with open(osp.join(cls_subsets_dir, '%s_%s.txt' % \
-                (label.name, subset_name)), 'w') as f:
-            for item in subset:
-                presence = label.value % 2
-                f.write('%s %2d\n' % (item, 1 if presence else -1))
-
-    # Detection + Action + Layout
-    subset_name = 'train'
-    subset = subsets[subset_name]
-    for item in subset:
-        root_elem = ET.Element('annotation')
-        ET.SubElement(root_elem, 'folder').text = 'VOC' + item.split('_')[0]
-        ET.SubElement(root_elem, 'filename').text = item + '.jpg'
-
-        size_elem = ET.SubElement(root_elem, 'size')
-        ET.SubElement(size_elem, 'width').text = '10'
-        ET.SubElement(size_elem, 'height').text = '20'
-        ET.SubElement(size_elem, 'depth').text = '3'
-
-        ET.SubElement(root_elem, 'segmented').text = '1'
-
-        obj1_elem = ET.SubElement(root_elem, 'object')
-        ET.SubElement(obj1_elem, 'name').text = 'cat'
-        ET.SubElement(obj1_elem, 'pose').text = VOC.VocPose(1).name
-        ET.SubElement(obj1_elem, 'truncated').text = '1'
-        ET.SubElement(obj1_elem, 'difficult').text = '0'
-        obj1bb_elem = ET.SubElement(obj1_elem, 'bndbox')
-        ET.SubElement(obj1bb_elem, 'xmin').text = '1'
-        ET.SubElement(obj1bb_elem, 'ymin').text = '2'
-        ET.SubElement(obj1bb_elem, 'xmax').text = '3'
-        ET.SubElement(obj1bb_elem, 'ymax').text = '4'
-
-        obj2_elem = ET.SubElement(root_elem, 'object')
-        ET.SubElement(obj2_elem, 'name').text = 'person'
-        obj2bb_elem = ET.SubElement(obj2_elem, 'bndbox')
-        ET.SubElement(obj2bb_elem, 'xmin').text = '4'
-        ET.SubElement(obj2bb_elem, 'ymin').text = '5'
-        ET.SubElement(obj2bb_elem, 'xmax').text = '6'
-        ET.SubElement(obj2bb_elem, 'ymax').text = '7'
-        obj2head_elem = ET.SubElement(obj2_elem, 'part')
-        ET.SubElement(obj2head_elem, 'name').text = VOC.VocBodyPart(1).name
-        obj2headbb_elem = ET.SubElement(obj2head_elem, 'bndbox')
-        ET.SubElement(obj2headbb_elem, 'xmin').text = '5.5'
-        ET.SubElement(obj2headbb_elem, 'ymin').text = '6'
-        ET.SubElement(obj2headbb_elem, 'xmax').text = '7.5'
-        ET.SubElement(obj2headbb_elem, 'ymax').text = '8'
-        obj2act_elem = ET.SubElement(obj2_elem, 'actions')
-        for act in VOC.VocAction:
-            ET.SubElement(obj2act_elem, act.name).text = '%s' % (act.value % 2)
-
-        with open(osp.join(ann_dir, item + '.xml'), 'w') as f:
-            f.write(ET.tostring(root_elem, encoding='unicode'))
-
-    # Segmentation + Instances
-    subset_name = 'train'
-    subset = subsets[subset_name]
-    for item in subset:
-        save_image(osp.join(segm_dir, item + '.png'),
-            np.tile(VOC.VocColormap[2][::-1], (5, 10, 1))
-        )
-        save_image(osp.join(inst_dir, item + '.png'),
-            np.tile(1, (5, 10, 1)))
-
-    # Test images
-    subset_name = 'test'
-    subset = subsets[subset_name]
-    for item in subset:
-        save_image(osp.join(img_dir, item + '.jpg'),
-            np.ones([10, 20, 3]))
-
-    return subsets
+    def test_can_write_and_parse_labelmap(self):
+        src_label_map = VOC.make_voc_label_map()
+        src_label_map['qq'] = [None, ['part1', 'part2'], ['act1', 'act2']]
+        src_label_map['ww'] = [(10, 20, 30), [], ['act3']]
+
+        with TestDir() as test_dir:
+            file_path = osp.join(test_dir, 'test.txt')
+
+            VOC.write_label_map(file_path, src_label_map)
+            dst_label_map = VOC.parse_label_map(file_path)
+
+            self.assertEqual(src_label_map, dst_label_map)
 
 class TestExtractorBase(Extractor):
     def _label(self, voc_label):
@@ -178,32 +71,20 @@ def _label(self, voc_label):
     def categories(self):
         return VOC.make_voc_categories()
 
-class VocExtractorTest(TestCase):
-    def test_can_load_voc_cls(self):
-        class DstExtractor(TestExtractorBase):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='2007_000001', subset='train',
-                        annotations=[
-                            Label(self._label(l.name))
-                            for l in VOC.VocLabel if l.value % 2 == 1
-                        ]
-                    ),
-                ])
-
-        with TestDir() as test_dir:
-            generate_dummy_voc(test_dir)
 
-            parsed_train = VocClassificationExtractor(
-                osp.join(test_dir, 'ImageSets', 'Main', 'train.txt'))
-            compare_datasets(self, DstExtractor(), parsed_train)
+DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'voc_dataset')
 
-    def test_can_load_voc_det(self):
+class VocImportTest(TestCase):
+    def test_can_import(self):
         class DstExtractor(TestExtractorBase):
             def __iter__(self):
                 return iter([
                     DatasetItem(id='2007_000001', subset='train',
+                        image=Image(path='2007_000001.jpg', size=(20, 10)),
                         annotations=[
+                            Label(self._label(l.name))
+                            for l in VOC.VocLabel if l.value % 2 == 1
+                        ] + [
                             Bbox(1, 2, 2, 2, label=self._label('cat'),
                                 attributes={
                                     'pose': VOC.VocPose(1).name,
@@ -224,102 +105,27 @@ def __iter__(self):
                                     }
                                 },
                                 id=2, group=2,
-                                # TODO: Actions and group should be excluded
-                                # as soon as correct merge is implemented
                             ),
-                        ]
-                    ),
-                ])
-
-        with TestDir() as test_dir:
-            generate_dummy_voc(test_dir)
-            parsed_train = VocDetectionExtractor(
-                osp.join(test_dir, 'ImageSets', 'Main', 'train.txt'))
-            compare_datasets(self, DstExtractor(), parsed_train)
-
-    def test_can_load_voc_segm(self):
-        class DstExtractor(TestExtractorBase):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='2007_000001', subset='train',
-                        annotations=[
+                            Bbox(5.5, 6, 2, 2, label=self._label(
+                                    VOC.VocBodyPart(1).name),
+                                group=2
+                            ),
                             Mask(image=np.ones([5, 10]),
                                 label=self._label(VOC.VocLabel(2).name),
                                 group=1,
                             ),
                         ]
                     ),
+                    DatasetItem(id='2007_000002', subset='test',
+                        image=np.zeros((20, 10, 3))),
                 ])
 
-        with TestDir() as test_dir:
-            generate_dummy_voc(test_dir)
-            parsed_train = VocSegmentationExtractor(
-                osp.join(test_dir, 'ImageSets', 'Segmentation', 'train.txt'))
-            compare_datasets(self, DstExtractor(), parsed_train)
+        dataset = Project.import_from(DUMMY_DATASET_DIR, 'voc').make_dataset()
 
-    def test_can_load_voc_layout(self):
-        class DstExtractor(TestExtractorBase):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='2007_000001', subset='train',
-                        annotations=[
-                            Bbox(4, 5, 2, 2, label=self._label('person'),
-                                attributes={
-                                    'truncated': False,
-                                    'difficult': False,
-                                    'occluded': False,
-                                    **{
-                                        a.name: a.value % 2 == 1
-                                        for a in VOC.VocAction
-                                    }
-                                },
-                                id=2, group=2,
-                                # TODO: Actions should be excluded
-                                # as soon as correct merge is implemented
-                            ),
-                            Bbox(5.5, 6, 2, 2, label=self._label(
-                                    VOC.VocBodyPart(1).name),
-                                group=2
-                            )
-                        ]
-                    ),
-                ])
-
-        with TestDir() as test_dir:
-            generate_dummy_voc(test_dir)
-            parsed_train = VocLayoutExtractor(
-                osp.join(test_dir, 'ImageSets', 'Layout', 'train.txt'))
-            compare_datasets(self, DstExtractor(), parsed_train)
+        compare_datasets(self, DstExtractor(), dataset)
 
-    def test_can_load_voc_action(self):
-        class DstExtractor(TestExtractorBase):
-            def __iter__(self):
-                return iter([
-                    DatasetItem(id='2007_000001', subset='train',
-                        annotations=[
-                            Bbox(4, 5, 2, 2, label=self._label('person'),
-                                attributes={
-                                    'truncated': False,
-                                    'difficult': False,
-                                    'occluded': False,
-                                    **{
-                                        a.name: a.value % 2 == 1
-                                        for a in VOC.VocAction
-                                    }
-                                    # TODO: group should be excluded
-                                    # as soon as correct merge is implemented
-                                },
-                                id=2, group=2,
-                            ),
-                        ]
-                    ),
-                ])
-
-        with TestDir() as test_dir:
-            generate_dummy_voc(test_dir)
-            parsed_train = VocActionExtractor(
-                osp.join(test_dir, 'ImageSets', 'Action', 'train.txt'))
-            compare_datasets(self, DstExtractor(), parsed_train)
+    def test_can_detect_voc(self):
+        self.assertTrue(VocImporter.detect(DUMMY_DATASET_DIR))
 
 class VocConverterTest(TestCase):
     def _test_save_and_load(self, source_dataset, converter, test_dir,
@@ -860,39 +666,3 @@ def __iter__(self):
         with TestDir() as test_dir:
             self._test_save_and_load(TestExtractor(),
                 VocConverter(label_map='voc'), test_dir)
-
-class VocImportTest(TestCase):
-    def test_can_import(self):
-        with TestDir() as test_dir:
-            subsets = generate_dummy_voc(test_dir)
-
-            dataset = Project.import_from(test_dir, 'voc').make_dataset()
-
-            self.assertEqual(len(VOC.VocTask) * len(subsets),
-                len(dataset.sources))
-            self.assertEqual(set(subsets), set(dataset.subsets()))
-            self.assertEqual(
-                sum([len(s) for _, s in subsets.items()]),
-                len(dataset))
-
-    def test_can_detect_voc(self):
-        with TestDir() as test_dir:
-            generate_dummy_voc(test_dir)
-
-            dataset_found = VocImporter.detect(test_dir)
-
-            self.assertTrue(dataset_found)
-
-class VocFormatTest(TestCase):
-    def test_can_write_and_parse_labelmap(self):
-        src_label_map = VOC.make_voc_label_map()
-        src_label_map['qq'] = [None, ['part1', 'part2'], ['act1', 'act2']]
-        src_label_map['ww'] = [(10, 20, 30), [], ['act3']]
-
-        with TestDir() as test_dir:
-            file_path = osp.join(test_dir, 'test.txt')
-
-            VOC.write_label_map(file_path, src_label_map)
-            dst_label_map = VOC.parse_label_map(file_path)
-
-            self.assertEqual(src_label_map, dst_label_map)
diff --git a/datumaro/tests/test_yolo_format.py b/datumaro/tests/test_yolo_format.py
index 4d29c349c24a..df71f5f02a28 100644
--- a/datumaro/tests/test_yolo_format.py
+++ b/datumaro/tests/test_yolo_format.py
@@ -6,6 +6,7 @@
 from datumaro.components.extractor import (Extractor, DatasetItem,
     AnnotationType, Bbox, LabelCategories,
 )
+from datumaro.components.project import Project
 from datumaro.plugins.yolo_format.importer import YoloImporter
 from datumaro.plugins.yolo_format.converter import YoloConverter
 from datumaro.util.image import Image, save_image
@@ -115,13 +116,19 @@ def categories(self):
 
             compare_datasets(self, source_dataset, parsed_dataset)
 
+
+DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'yolo_dataset')
+
 class YoloImporterTest(TestCase):
     def test_can_detect(self):
-        class TestExtractor(Extractor):
+        self.assertTrue(YoloImporter.detect(DUMMY_DATASET_DIR))
+
+    def test_can_import(self):
+        class DstExtractor(Extractor):
             def __iter__(self):
                 return iter([
                     DatasetItem(id=1, subset='train',
-                        image=Image(path='1.jpg', size=(10, 15)),
+                        image=np.ones((10, 15, 3)),
                         annotations=[
                             Bbox(0, 2, 4, 2, label=2),
                             Bbox(3, 3, 2, 3, label=4),
@@ -136,7 +143,7 @@ def categories(self):
                     AnnotationType.label: label_categories,
                 }
 
-        with TestDir() as test_dir:
-            YoloConverter()(TestExtractor(), save_dir=test_dir)
+        dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \
+            .make_dataset()
 
-            self.assertTrue(YoloImporter.detect(test_dir))
\ No newline at end of file
+        compare_datasets(self, DstExtractor(), dataset)