Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

log ffmpeg command line & export interpolation to VOC #312

Merged
merged 10 commits into from
Feb 12, 2019
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Ability to rotate images/video in the client part (Ctrl+R, Shift+Ctrl+R shortcuts) (#305)
- The ReID application for automatic bounding box merging has been added (#299)
- Keyboard shortcuts to switch next/previous default shape type (box, polygon etc) [Alt + <, Alt + >] (#316)

- Converter for VOC now supports interpolation tracks

### Changed
- Propagation setup has been moved from settings to bottom player panel
Expand Down
2 changes: 2 additions & 0 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ def __init__(self, source_path, compress_quality, flip_flag=False):
ff = FFmpeg(
inputs = {source_path: None},
outputs = {target_path: output_opts})

slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
ff.run()

def getframepath(self, k):
Expand Down
129 changes: 94 additions & 35 deletions utils/voc/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
CVAT XML and writes the annotations in PASCAL VOC format into a given
directory.

This implementation only supports bounding boxes in CVAT annotation format, and
warns if it encounter any tracks or annotations that are not bounding boxes,
ignoring them in both cases.
This implementation supports both interpolation tracks from video and
annotated images. If it encounters any tracks or annotations that are
not bounding boxes, it ignores them.
"""

import os
Expand Down Expand Up @@ -56,38 +56,97 @@ def process_cvat_xml(xml_file, image_dir, output_dir):
os.makedirs(output_dir, exist_ok=True)
cvat_xml = etree.parse(xml_file)

tracks = [(x.get('id'), x.get('label'))
for x in cvat_xml.findall('track')]
if tracks:
log.warn('Cannot parse interpolation tracks, ignoring {} tracks'.format(len(tracks)))

for img_tag in cvat_xml.findall('image'):
image_name = img_tag.get('name')
width = img_tag.get('width')
height = img_tag.get('height')
image_path = os.path.join(image_dir, image_name)
if not os.path.exists(image_path):
log.warn('{} image cannot be found. Is `{}` image directory correct?'.
format(image_path, image_dir))
writer = Writer(image_path, width, height)

unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
if unknown_tags:
log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags))

for box in img_tag.findall('box'):
label = box.get('label')
xmin = float(box.get('xtl'))
ymin = float(box.get('ytl'))
xmax = float(box.get('xbr'))
ymax = float(box.get('ybr'))

writer.addObject(label, xmin, ymin, xmax, ymax)

anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
os.makedirs(anno_dir, exist_ok=True)
writer.save(os.path.join(anno_dir, anno_name))
basename = os.path.splitext( os.path.basename( xml_file ) )[0]

tracks= cvat_xml.findall( './/track' )

if (tracks is not None) and (len(tracks) > 0):
frames = {}

for track in tracks:
trackid = int(track.get("id"))
label = track.get("label")
boxes = track.findall( './box' )
for box in boxes:
frameid = int(box.get('frame'))
outside = int(box.get('outside'))
#occluded = int(box.get('occluded')) #currently unused
#keyframe = int(box.get('keyframe')) #currently unused
xtl = float(box.get('xtl'))
ytl = float(box.get('ytl'))
xbr = float(box.get('xbr'))
ybr = float(box.get('ybr'))

frame = frames.get( frameid, {} )

if outside == 0:
frame[ trackid ] = { 'xtl': xtl, 'ytl': ytl, 'xbr': xbr, 'ybr': ybr, 'label': label }

frames[ frameid ] = frame

width = int(cvat_xml.find('.//original_size/width').text)
height = int(cvat_xml.find('.//original_size/height').text)

# Spit out a list of each object for each frame
for frameid in sorted(frames.keys()):
#print( frameid )

image_name = "%s_%08d.jpg" % (basename, frameid)
image_path = os.path.join(image_dir, image_name)
if not os.path.exists(image_path):
log.warn('{} image cannot be found. Is `{}` image directory correct?'.
format(image_path, image_dir))
writer = Writer(image_path, width, height)

jrjbertram marked this conversation as resolved.
Show resolved Hide resolved
frame = frames[frameid]

objids = sorted(frame.keys())

for objid in objids:
jrjbertram marked this conversation as resolved.
Show resolved Hide resolved

box = frame[objid]

label = box.get('label')
xmin = float(box.get('xtl'))
ymin = float(box.get('ytl'))
xmax = float(box.get('xbr'))
ymax = float(box.get('ybr'))

writer.addObject(label, xmin, ymin, xmax, ymax)

anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
os.makedirs(anno_dir, exist_ok=True)
writer.save(os.path.join(anno_dir, anno_name))

else:
for img_tag in cvat_xml.findall('image'):
image_name = img_tag.get('name')
width = img_tag.get('width')
height = img_tag.get('height')
image_path = os.path.join(image_dir, image_name)
if not os.path.exists(image_path):
log.warn('{} image cannot be found. Is `{}` image directory correct?'.
format(image_path, image_dir))
writer = Writer(image_path, width, height)

unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
if unknown_tags:
log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags))

for box in img_tag.findall('box'):
label = box.get('label')
xmin = float(box.get('xtl'))
ymin = float(box.get('ytl'))
xmax = float(box.get('xbr'))
ymax = float(box.get('ybr'))

writer.addObject(label, xmin, ymin, xmax, ymax)

anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.xml')
anno_dir = os.path.dirname(os.path.join(output_dir, image_name))
os.makedirs(anno_dir, exist_ok=True)
writer.save(os.path.join(anno_dir, anno_name))


def main():
Expand Down
20 changes: 17 additions & 3 deletions utils/voc/tests/test_process_cvat_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@
<username>admin</username>
<email></email>
</owner>
<original_size>
<width>1024</width>
<height>768</height>
</original_size>
</task>
<dumped>2018-06-06 15:52:11.138470+03:00</dumped>
</meta>
Expand Down Expand Up @@ -153,6 +157,7 @@ def test_parse_annotation_xml(self, mock_log):
process_cvat_xml(xml_filename, 'img_dir', voc_dir)
for exp in expected_xmls:
self.assertTrue(os.path.exists(exp))
# We should add in some code to parse the resulting xml files

@mock.patch('utils.voc.converter.log')
def test_parse_interpolation_xml(self, mock_log):
Expand All @@ -161,10 +166,19 @@ def test_parse_interpolation_xml(self, mock_log):
file.write(XML_INTERPOLATION_EXAMPLE)

voc_dir = os.path.join(self.test_dir, 'voc_dir')
expected_warn = 'Cannot parse interpolation tracks, ignoring 2 tracks'


frames = [0, 1, 2, 110, 111, 112 ]
expected_xmls = [os.path.join(voc_dir, 'interpolations_%08d.xml' % x )
for x in frames]

process_cvat_xml(xml_filename, 'img_dir', voc_dir)

self.assertTrue(os.path.exists(voc_dir))
self.assertTrue(len(os.listdir(voc_dir)) == 0)
mock_log.warn.assert_called_once_with(expected_warn)
self.assertTrue(len(os.listdir(voc_dir)) == len(frames))
for exp in expected_xmls:
self.assertTrue(os.path.exists(exp))
# We should add in some code to parse the resulting xml files