cvat-ai · nmanovic · Feb 12, 2019 · Feb 5, 2019 · Feb 5, 2019 · Feb 5, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Ability to rotate images/video in the client part (Ctrl+R, Shift+Ctrl+R shortcuts) (#305)
 - The ReID application for automatic bounding box merging has been added (#299)
 - Keyboard shortcuts to switch next/previous default shape type (box, polygon etc) [Alt + <, Alt + >] (#316)
-
+- Converter for VOC now supports interpolation tracks 
 
 ### Changed
 - Propagation setup has been moved from settings to bottom player panel

@@ -283,6 +283,8 @@ def __init__(self, source_path, compress_quality, flip_flag=False):
         ff = FFmpeg(
             inputs  = {source_path: None},
             outputs = {target_path: output_opts})
+
+        slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
         ff.run()
 
     def getframepath(self, k):

@@ -6,9 +6,9 @@
 CVAT XML and writes the annotations in PASCAL VOC format into a given
 directory.
 
-This implementation only supports bounding boxes in CVAT annotation format, and
-warns if it encounter any tracks or annotations that are not bounding boxes,
-ignoring them in both cases.
+This implementation supports both interpolation tracks from video and 
+annotated images.  If it encounters any tracks or annotations that are 
+not bounding boxes, it ignores them.
 """
 
 import os
@@ -56,38 +56,97 @@ def process_cvat_xml(xml_file, image_dir, output_dir):
     os.makedirs(output_dir, exist_ok=True)
     cvat_xml = etree.parse(xml_file)
 
-    tracks = [(x.get('id'), x.get('label'))
-              for x in cvat_xml.findall('track')]
-    if tracks:
-        log.warn('Cannot parse interpolation tracks, ignoring {} tracks'.format(len(tracks)))
-
-    for img_tag in cvat_xml.findall('image'):
-        image_name = img_tag.get('name')
-        width = img_tag.get('width')
-        height = img_tag.get('height')
-        image_path = os.path.join(image_dir, image_name)
-        if not os.path.exists(image_path):
-            log.warn('{} image cannot be found. Is `{}` image directory correct?'.
-                format(image_path, image_dir))
-        writer = Writer(image_path, width, height)
-
-        unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
-        if unknown_tags:
-            log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags))
-
-        for box in img_tag.findall('box'):
-            label = box.get('label')
-            xmin = float(box.get('xtl'))
-            ymin = float(box.get('ytl'))
-            xmax = float(box.get('xbr'))
-            ymax = float(box.get('ybr'))
-
-            writer.addObject(label, xmin, ymin, xmax, ymax)
-
-        anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
-        anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
-        os.makedirs(anno_dir, exist_ok=True)
-        writer.save(os.path.join(anno_dir, anno_name))
+    basename = os.path.splitext( os.path.basename( xml_file ) )[0]
+
+    tracks= cvat_xml.findall( './/track' )
+
+    if (tracks is not None) and (len(tracks) > 0):
+        frames = {}
+
+        for track in tracks:
+            trackid = int(track.get("id"))
+            label = track.get("label")
+            boxes = track.findall( './box' )
+            for box in boxes:
+                frameid  = int(box.get('frame'))
+                outside  = int(box.get('outside'))
+                #occluded = int(box.get('occluded'))  #currently unused
+                #keyframe = int(box.get('keyframe'))  #currently unused
+                xtl      = float(box.get('xtl'))
+                ytl      = float(box.get('ytl'))
+                xbr      = float(box.get('xbr'))
+                ybr      = float(box.get('ybr'))
+
+                frame = frames.get( frameid, {} )
+
+                if outside == 0:
+                    frame[ trackid ] = { 'xtl': xtl, 'ytl': ytl, 'xbr': xbr, 'ybr': ybr, 'label': label }
+
+                frames[ frameid ] = frame
+
+        width = int(cvat_xml.find('.//original_size/width').text)
+        height  = int(cvat_xml.find('.//original_size/height').text)
+
+        # Spit out a list of each object for each frame
+        for frameid in sorted(frames.keys()):
+            #print( frameid )
+
+            image_name = "%s_%08d.jpg" % (basename, frameid)
+            image_path = os.path.join(image_dir, image_name)
+            if not os.path.exists(image_path):
+                log.warn('{} image cannot be found. Is `{}` image directory correct?'.
+                    format(image_path, image_dir))
+            writer = Writer(image_path, width, height)
+
+            frame = frames[frameid]
+
+            objids = sorted(frame.keys())
+
+            for objid in objids:
+
+                box = frame[objid]
+
+                label = box.get('label')
+                xmin = float(box.get('xtl'))
+                ymin = float(box.get('ytl'))
+                xmax = float(box.get('xbr'))
+                ymax = float(box.get('ybr'))
+
+                writer.addObject(label, xmin, ymin, xmax, ymax)
+
+            anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
+            anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
+            os.makedirs(anno_dir, exist_ok=True)
+            writer.save(os.path.join(anno_dir, anno_name))
+
+    else:
+        for img_tag in cvat_xml.findall('image'):
+            image_name = img_tag.get('name')
+            width = img_tag.get('width')
+            height = img_tag.get('height')
+            image_path = os.path.join(image_dir, image_name)
+            if not os.path.exists(image_path):
+                log.warn('{} image cannot be found. Is `{}` image directory correct?'.
+                    format(image_path, image_dir))
+            writer = Writer(image_path, width, height)
+
+            unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
+            if unknown_tags:
+                log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags))
+
+            for box in img_tag.findall('box'):
+                label = box.get('label')
+                xmin = float(box.get('xtl'))
+                ymin = float(box.get('ytl'))
+                xmax = float(box.get('xbr'))
+                ymax = float(box.get('ybr'))
+
+                writer.addObject(label, xmin, ymin, xmax, ymax)
+
+            anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
+            anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
+            os.makedirs(anno_dir, exist_ok=True)
+            writer.save(os.path.join(anno_dir, anno_name))
 
 
 def main():

@@ -103,6 +103,10 @@
         <username>admin</username>
         <email></email>
       </owner>
+      <original_size>
+         <width>1024</width>
+         <height>768</height>
+      </original_size>
     </task>
     <dumped>2018-06-06 15:52:11.138470+03:00</dumped>
   </meta>
@@ -153,6 +157,7 @@ def test_parse_annotation_xml(self, mock_log):
         process_cvat_xml(xml_filename, 'img_dir', voc_dir)
         for exp in expected_xmls:
             self.assertTrue(os.path.exists(exp))
+            # We should add in some code to parse the resulting xml files
 
     @mock.patch('utils.voc.converter.log')
     def test_parse_interpolation_xml(self, mock_log):
@@ -161,10 +166,19 @@ def test_parse_interpolation_xml(self, mock_log):
             file.write(XML_INTERPOLATION_EXAMPLE)
 
         voc_dir = os.path.join(self.test_dir, 'voc_dir')
-        expected_warn = 'Cannot parse interpolation tracks, ignoring 2 tracks'
+
+
+        frames = [0, 1, 2, 110, 111, 112 ]
+        expected_xmls = [os.path.join(voc_dir, 'interpolations_%08d.xml' % x )
+                         for x in frames]
 
         process_cvat_xml(xml_filename, 'img_dir', voc_dir)
 
         self.assertTrue(os.path.exists(voc_dir))
-        self.assertTrue(len(os.listdir(voc_dir)) == 0)
-        mock_log.warn.assert_called_once_with(expected_warn)
+        self.assertTrue(len(os.listdir(voc_dir)) == len(frames))
+        for exp in expected_xmls:
+            self.assertTrue(os.path.exists(exp))
+            # We should add in some code to parse the resulting xml files
+
+
+