diff --git a/CHANGELOG.md b/CHANGELOG.md index 549d87e2ca22..e015e2a74372 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Ability to rotate images/video in the client part (Ctrl+R, Shift+Ctrl+R shortcuts) (#305) - The ReID application for automatic bounding box merging has been added (#299) - Keyboard shortcuts to switch next/previous default shape type (box, polygon etc) [Alt + <, Alt + >] (#316) - +- Converter for VOC now supports interpolation tracks ### Changed - Propagation setup has been moved from settings to bottom player panel diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index e1b2b1ffd268..37b00ba393ab 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -283,6 +283,8 @@ def __init__(self, source_path, compress_quality, flip_flag=False): ff = FFmpeg( inputs = {source_path: None}, outputs = {target_path: output_opts}) + + slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd)) ff.run() def getframepath(self, k): diff --git a/utils/voc/converter.py b/utils/voc/converter.py index a7ac348df24a..1764e819313e 100644 --- a/utils/voc/converter.py +++ b/utils/voc/converter.py @@ -6,9 +6,9 @@ CVAT XML and writes the annotations in PASCAL VOC format into a given directory. -This implementation only supports bounding boxes in CVAT annotation format, and -warns if it encounter any tracks or annotations that are not bounding boxes, -ignoring them in both cases. +This implementation supports both interpolation tracks from video and +annotated images. If it encounters any tracks or annotations that are +not bounding boxes, it ignores them. """ import os @@ -56,38 +56,97 @@ def process_cvat_xml(xml_file, image_dir, output_dir): os.makedirs(output_dir, exist_ok=True) cvat_xml = etree.parse(xml_file) - tracks = [(x.get('id'), x.get('label')) - for x in cvat_xml.findall('track')] - if tracks: - log.warn('Cannot parse interpolation tracks, ignoring {} tracks'.format(len(tracks))) - - for img_tag in cvat_xml.findall('image'): - image_name = img_tag.get('name') - width = img_tag.get('width') - height = img_tag.get('height') - image_path = os.path.join(image_dir, image_name) - if not os.path.exists(image_path): - log.warn('{} image cannot be found. Is `{}` image directory correct?'. - format(image_path, image_dir)) - writer = Writer(image_path, width, height) - - unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS) - if unknown_tags: - log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags)) - - for box in img_tag.findall('box'): - label = box.get('label') - xmin = float(box.get('xtl')) - ymin = float(box.get('ytl')) - xmax = float(box.get('xbr')) - ymax = float(box.get('ybr')) - - writer.addObject(label, xmin, ymin, xmax, ymax) - - anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml') - anno_dir = os.path.dirname(os.path.join(output_dir, image_name)) - os.makedirs(anno_dir, exist_ok=True) - writer.save(os.path.join(anno_dir, anno_name)) + basename = os.path.splitext( os.path.basename( xml_file ) )[0] + + tracks= cvat_xml.findall( './/track' ) + + if (tracks is not None) and (len(tracks) > 0): + frames = {} + + for track in tracks: + trackid = int(track.get("id")) + label = track.get("label") + boxes = track.findall( './box' ) + for box in boxes: + frameid = int(box.get('frame')) + outside = int(box.get('outside')) + #occluded = int(box.get('occluded')) #currently unused + #keyframe = int(box.get('keyframe')) #currently unused + xtl = float(box.get('xtl')) + ytl = float(box.get('ytl')) + xbr = float(box.get('xbr')) + ybr = float(box.get('ybr')) + + frame = frames.get( frameid, {} ) + + if outside == 0: + frame[ trackid ] = { 'xtl': xtl, 'ytl': ytl, 'xbr': xbr, 'ybr': ybr, 'label': label } + + frames[ frameid ] = frame + + width = int(cvat_xml.find('.//original_size/width').text) + height = int(cvat_xml.find('.//original_size/height').text) + + # Spit out a list of each object for each frame + for frameid in sorted(frames.keys()): + #print( frameid ) + + image_name = "%s_%08d.jpg" % (basename, frameid) + image_path = os.path.join(image_dir, image_name) + if not os.path.exists(image_path): + log.warn('{} image cannot be found. Is `{}` image directory correct?'. + format(image_path, image_dir)) + writer = Writer(image_path, width, height) + + frame = frames[frameid] + + objids = sorted(frame.keys()) + + for objid in objids: + + box = frame[objid] + + label = box.get('label') + xmin = float(box.get('xtl')) + ymin = float(box.get('ytl')) + xmax = float(box.get('xbr')) + ymax = float(box.get('ybr')) + + writer.addObject(label, xmin, ymin, xmax, ymax) + + anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml') + anno_dir = os.path.dirname(os.path.join(output_dir, image_name)) + os.makedirs(anno_dir, exist_ok=True) + writer.save(os.path.join(anno_dir, anno_name)) + + else: + for img_tag in cvat_xml.findall('image'): + image_name = img_tag.get('name') + width = img_tag.get('width') + height = img_tag.get('height') + image_path = os.path.join(image_dir, image_name) + if not os.path.exists(image_path): + log.warn('{} image cannot be found. Is `{}` image directory correct?'. + format(image_path, image_dir)) + writer = Writer(image_path, width, height) + + unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS) + if unknown_tags: + log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags)) + + for box in img_tag.findall('box'): + label = box.get('label') + xmin = float(box.get('xtl')) + ymin = float(box.get('ytl')) + xmax = float(box.get('xbr')) + ymax = float(box.get('ybr')) + + writer.addObject(label, xmin, ymin, xmax, ymax) + + anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml') + anno_dir = os.path.dirname(os.path.join(output_dir, image_name)) + os.makedirs(anno_dir, exist_ok=True) + writer.save(os.path.join(anno_dir, anno_name)) def main(): diff --git a/utils/voc/tests/test_process_cvat_xml.py b/utils/voc/tests/test_process_cvat_xml.py index eb3456219f78..60a300bd4bb2 100644 --- a/utils/voc/tests/test_process_cvat_xml.py +++ b/utils/voc/tests/test_process_cvat_xml.py @@ -103,6 +103,10 @@ admin + + 1024 + 768 + 2018-06-06 15:52:11.138470+03:00 @@ -153,6 +157,7 @@ def test_parse_annotation_xml(self, mock_log): process_cvat_xml(xml_filename, 'img_dir', voc_dir) for exp in expected_xmls: self.assertTrue(os.path.exists(exp)) + # We should add in some code to parse the resulting xml files @mock.patch('utils.voc.converter.log') def test_parse_interpolation_xml(self, mock_log): @@ -161,10 +166,19 @@ def test_parse_interpolation_xml(self, mock_log): file.write(XML_INTERPOLATION_EXAMPLE) voc_dir = os.path.join(self.test_dir, 'voc_dir') - expected_warn = 'Cannot parse interpolation tracks, ignoring 2 tracks' + + + frames = [0, 1, 2, 110, 111, 112 ] + expected_xmls = [os.path.join(voc_dir, 'interpolations_%08d.xml' % x ) + for x in frames] process_cvat_xml(xml_filename, 'img_dir', voc_dir) self.assertTrue(os.path.exists(voc_dir)) - self.assertTrue(len(os.listdir(voc_dir)) == 0) - mock_log.warn.assert_called_once_with(expected_warn) + self.assertTrue(len(os.listdir(voc_dir)) == len(frames)) + for exp in expected_xmls: + self.assertTrue(os.path.exists(exp)) + # We should add in some code to parse the resulting xml files + + +