Support relative paths in LabelMe (cvat-ai#19)

TOsmanov · Sep 17, 2020 · c2d6c79 · c2d6c79
1 parent f6254df
commit c2d6c79
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 29 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 ### Added
 - `reindex` option in COCO and CVAT converters (<https://github.com/openvinotoolkit/datumaro/pull/18>)
+- Support for relative paths in LabelMe format (<https://github.com/openvinotoolkit/datumaro/pull/19>)
 
 ### Changed
 -

diff --git a/datumaro/plugins/labelme_format.py b/datumaro/plugins/labelme_format.py
@@ -49,10 +49,14 @@ def _parse(self, path):
         }
 
         items = []
-        for p in sorted(p for p in os.listdir(path) if p.endswith('.xml')):
+        for p in os.listdir(path):
+            if not p.endswith('.xml'):
+                continue
             root = ElementTree.parse(osp.join(path, p))
 
-            image_path = osp.join(path, root.find('filename').text)
+            item_id = osp.join(root.find('folder').text or '',
+                root.find('filename').text)
+            image_path = osp.join(path, item_id)
             image_size = None
             imagesize_elem = root.find('imagesize')
             if imagesize_elem is not None:
@@ -63,7 +67,7 @@ def _parse(self, path):
 
             annotations = self._parse_annotations(root, path, categories)
 
-            items.append(DatasetItem(id=osp.splitext(p)[0],
+            items.append(DatasetItem(id=osp.splitext(item_id)[0],
                 subset=self._subset, image=image, annotations=annotations))
         return items, categories
 
@@ -298,24 +302,20 @@ def apply(self):
             os.makedirs(osp.join(subset_dir, LabelMePath.MASKS_DIR),
                 exist_ok=True)
 
-            for item in subset:
-                self._save_item(item, subset_dir)
+            for index, item in enumerate(subset):
+                self._save_item(item, subset_dir, index)
 
     def _get_label(self, label_id):
         if label_id is None:
             return ''
         return self._extractor.categories()[AnnotationType.label] \
             .items[label_id].name
 
-    def _save_item(self, item, subset_dir):
+    def _save_item(self, item, subset_dir, index):
         from lxml import etree as ET
 
         log.debug("Converting item '%s'", item.id)
 
-        if '/' in item.id:
-            raise Exception("Can't export item '%s': "
-                "LabelMe format only supports flat image layout" % item.id)
-
         image_filename = self._make_image_filename(item)
         if self._save_images:
             if item.has_image and item.image.has_data:
@@ -324,8 +324,8 @@ def _save_item(self, item, subset_dir):
                 log.debug("Item '%s' has no image", item.id)
 
         root_elem = ET.Element('annotation')
-        ET.SubElement(root_elem, 'filename').text = image_filename
-        ET.SubElement(root_elem, 'folder').text = ''
+        ET.SubElement(root_elem, 'filename').text = osp.basename(image_filename)
+        ET.SubElement(root_elem, 'folder').text = osp.dirname(image_filename)
 
         source_elem = ET.SubElement(root_elem, 'source')
         ET.SubElement(source_elem, 'sourceImage').text = ''
@@ -384,7 +384,8 @@ def _save_item(self, item, subset_dir):
                 ET.SubElement(poly_elem, 'username').text = \
                     str(ann.attributes.pop('username', ''))
             elif ann.type == AnnotationType.mask:
-                mask_filename = '%s_mask_%s.png' % (item.id, obj_id)
+                mask_filename = '%s_mask_%s.png' % \
+                    (item.id.replace('/', '_'), obj_id)
                 save_image(osp.join(subset_dir, LabelMePath.MASKS_DIR,
                         mask_filename),
                     self._paint_mask(ann.image))
@@ -424,7 +425,7 @@ def _save_item(self, item, subset_dir):
                 ET.SubElement(parts_elem, 'hasparts').text = ''
                 ET.SubElement(parts_elem, 'ispartof').text = str(leader_id)
 
-        xml_path = osp.join(subset_dir, '%s.xml' % item.id)
+        xml_path = osp.join(subset_dir, 'item_%09d.xml' % index)
         with open(xml_path, 'w', encoding='utf-8') as f:
             xml_data = ET.tostring(root_elem, encoding='unicode',
                 pretty_print=True)

diff --git a/tests/test_labelme_format.py b/tests/test_labelme_format.py
@@ -30,7 +30,7 @@ def _test_save_and_load(self, source_dataset, converter, test_dir,
 
     def test_can_save_and_load(self):
         source_dataset = Dataset.from_iterable([
-            DatasetItem(id=1, subset='train',
+            DatasetItem(id='dir1/1', subset='train',
                 image=np.ones((16, 16, 3)),
                 annotations=[
                     Bbox(0, 4, 4, 8, label=2, group=2),
@@ -54,7 +54,7 @@ def test_can_save_and_load(self):
         })
 
         target_dataset = Dataset.from_iterable([
-            DatasetItem(id=1, subset='train',
+            DatasetItem(id='dir1/1', subset='train',
                 image=np.ones((16, 16, 3)),
                 annotations=[
                     Bbox(0, 4, 4, 8, label=0, group=2, id=0,
@@ -96,18 +96,6 @@ def test_can_save_and_load(self):
                 partial(LabelMeConverter.convert, save_images=True),
                 test_dir, target_dataset=target_dataset)
 
-    def test_cant_save_dataset_with_relative_paths(self):
-        expected_dataset = Dataset.from_iterable([
-            DatasetItem(id='dir/1', image=np.ones((2, 6, 3))),
-        ], categories={
-            AnnotationType.label: LabelCategories(),
-        })
-
-        with self.assertRaisesRegex(Exception, r'only supports flat'):
-            with TestDir() as test_dir:
-                self._test_save_and_load(expected_dataset,
-                    LabelMeConverter.convert, test_dir)
-
 
 DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset')
 
@@ -139,7 +127,7 @@ def test_can_import(self):
         ]
 
         target_dataset = Dataset.from_iterable([
-            DatasetItem(id='img1', image=img1,
+            DatasetItem(id='example_folder/img1', image=img1,
                 annotations=[
                     Polygon([43, 34, 45, 34, 45, 37, 43, 37],
                         label=0, id=0,