-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
YOLO adapted version of PASCAL VOC converter.py (#454)
- Loading branch information
Showing
6 changed files
with
312 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# Utility for converting CVAT XML annotation file to YOLO format | ||
|
||
## Description | ||
|
||
Given a CVAT XML, this script reads the CVAT XML and writes the | ||
annotations in YOLO format into a given directory. This implementation | ||
supports both interpolation tracks from video and annotated images. | ||
|
||
## Installation | ||
|
||
Install necessary packages and create a virtual environment. | ||
|
||
```bash | ||
sudo apt-get update | ||
sudo apt-get install -y --no-install-recommends python3-pip python3-venv python3-dev | ||
``` | ||
|
||
```bash | ||
python3 -m venv .env | ||
. .env/bin/activate | ||
cat requirements.txt | xargs -n 1 -L 1 pip install | ||
``` | ||
|
||
## Usage | ||
|
||
Run the script inside the virtual environment: | ||
|
||
```bash | ||
python converter.py --cvat-xml </path/to/cvat/xml> --image-dir </path/to/images> --output-dir </path/to/output/directory> | ||
``` | ||
|
||
Case you need download frames from annotated video file submited to CVAT: | ||
|
||
```bash | ||
python converter.py --cvat-xml </path/to/cvat/xml> --output-dir </path/to/output/directory> --username <CVAT Username> --password <CVAT Password> | ||
``` | ||
|
||
Please run `python converter.py --help` for more details. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,264 @@ | ||
#!/usr/bin/env python | ||
# | ||
# SPDX-License-Identifier: MIT | ||
""" | ||
Given a CVAT XML, this script reads the CVAT XML and writes the | ||
annotations in YOLO format into a given directory. | ||
This implementation supports both interpolation tracks from video and | ||
annotated images. | ||
""" | ||
|
||
import os | ||
import argparse | ||
import glog as log | ||
from lxml import etree | ||
import requests | ||
|
||
|
||
def parse_args(): | ||
"""Parse arguments of command line""" | ||
parser = argparse.ArgumentParser( | ||
description='Convert CVAT XML annotations to YOLO format' | ||
) | ||
|
||
parser.add_argument( | ||
'--cvat-xml', metavar='FILE', required=True, | ||
help='input file with CVAT annotation in xml format' | ||
) | ||
|
||
parser.add_argument( | ||
'--image-dir', metavar='DIRECTORY', required=False, | ||
help='directory which contains original images' | ||
) | ||
|
||
parser.add_argument( | ||
'--output-dir', metavar='DIRECTORY', required=True, | ||
help='directory for output annotations in YOLO format' | ||
) | ||
|
||
parser.add_argument( | ||
'--username', metavar='USERNAME', required=False, | ||
help='Username from CVAT Login page, required to download images' | ||
) | ||
|
||
parser.add_argument( | ||
'--password', metavar='PASSWORD', required=False, | ||
help='Password from CVAT Login page, required to download images' | ||
) | ||
|
||
parser.add_argument( | ||
'--labels', metavar='ILABELS', required=False, | ||
help='Labels (separated by comma) to extract. Example: car,truck,motorcycle' | ||
) | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def process_cvat_xml(xml_file, image_dir, output_dir,username,password,ilabels): | ||
""" | ||
Transforms a single XML in CVAT format to YOLO TXT files and download images when not in IMAGE_DIR | ||
:param xml_file: CVAT format XML | ||
:param image_dir: image directory of the dataset | ||
:param output_dir: directory of annotations with YOLO format | ||
:param username: Username used to login CVAT. Required to download images | ||
:param password: Password used to login CVAT. Required to download images | ||
:param ilabels: Comma separated ordered labels | ||
:return: | ||
""" | ||
KNOWN_TAGS = {'box', 'image', 'attribute'} | ||
|
||
if (image_dir is None): | ||
image_dir=os.path.join(output_dir,"data/obj") | ||
os.makedirs(image_dir, exist_ok=True) | ||
|
||
os.makedirs(output_dir, exist_ok=True) | ||
cvat_xml = etree.parse(xml_file) | ||
basename = os.path.splitext( os.path.basename( xml_file ) )[0] | ||
current_labels = {} | ||
traintxt = "" | ||
auto_lbl_count = 0 | ||
|
||
if (ilabels is not None): | ||
vlabels=ilabels.split(',') | ||
for _label in vlabels: | ||
current_labels[_label]=auto_lbl_count | ||
auto_lbl_count+=1 | ||
|
||
tracks= cvat_xml.findall( './/track' ) | ||
|
||
if (tracks is not None) and (len(tracks) > 0): | ||
frames = {} | ||
|
||
for track in tracks: | ||
trackid = int(track.get("id")) | ||
label = track.get("label") | ||
boxes = track.findall( './box' ) | ||
for box in boxes: | ||
frameid = int(box.get('frame')) | ||
outside = int(box.get('outside')) | ||
#occluded = int(box.get('occluded')) #currently unused | ||
#keyframe = int(box.get('keyframe')) #currently unused | ||
xtl = float(box.get('xtl')) | ||
ytl = float(box.get('ytl')) | ||
xbr = float(box.get('xbr')) | ||
ybr = float(box.get('ybr')) | ||
|
||
frame = frames.get( frameid, {} ) | ||
|
||
if outside == 0: | ||
frame[ trackid ] = { 'xtl': xtl, 'ytl': ytl, 'xbr': xbr, 'ybr': ybr, 'label': label } | ||
|
||
frames[ frameid ] = frame | ||
|
||
width = int(cvat_xml.find('.//original_size/width').text) | ||
height = int(cvat_xml.find('.//original_size/height').text) | ||
|
||
taskid = int(cvat_xml.find('.//task/id').text) | ||
|
||
urlsegment = cvat_xml.find(".//segments/segment/url").text | ||
urlbase = urlsegment.split("?")[0] | ||
|
||
httpclient = requests.session() | ||
httpclient.get(urlbase) | ||
|
||
csrftoken = "none" | ||
sessionid = "none" | ||
|
||
# Spit out a list of each object for each frame | ||
for frameid in sorted(frames.keys()): | ||
image_name = "%s_%08d.jpg" % (basename, frameid) | ||
image_path = os.path.join(image_dir, image_name) | ||
if not os.path.exists(image_path): | ||
if username is None: | ||
log.warn('{} image cannot be found. Is `{}` image directory correct?\n'.format(image_path, image_dir)) | ||
else: | ||
log.info('{} image cannot be found. Downloading from task ID {}\n'.format(image_path, taskid)) | ||
|
||
if sessionid == "none": | ||
if "csrftoken" in httpclient.cookies: | ||
csrftoken = httpclient.cookies["csrftoken"] | ||
elif "csrf" in httpclient.cookies: | ||
csrftoken = httpclient.cookies["csrf"] | ||
|
||
login_data = dict(username=username, password=password, | ||
csrfmiddlewaretoken=csrftoken, next='/dashboard') | ||
|
||
urllogin = urlbase+"/auth/login" | ||
httpclient.post(urllogin, data=login_data, | ||
headers=dict(Referer=urllogin)) | ||
|
||
if ("sessionid" in httpclient.cookies): | ||
sessionid = httpclient.cookies["sessionid"] | ||
|
||
url = urlbase+"/api/v1/tasks/"+str(taskid)+"/frames/"+ str(frameid) | ||
|
||
req = httpclient.get(url, headers=dict( | ||
csrftoken=csrftoken, sessionid=sessionid)) | ||
|
||
with open(image_path, 'wb') as fo: | ||
fo.write(req.content) | ||
print('Url saved as %s\n' % image_path) | ||
|
||
|
||
frame = frames[frameid] | ||
|
||
_yoloAnnotationContent="" | ||
|
||
objids = sorted(frame.keys()) | ||
|
||
for objid in objids: | ||
|
||
box = frame[objid] | ||
|
||
label = box.get('label') | ||
xmin = float(box.get('xtl')) | ||
ymin = float(box.get('ytl')) | ||
xmax = float(box.get('xbr')) | ||
ymax = float(box.get('ybr')) | ||
|
||
if not label in current_labels: | ||
current_labels[label] = auto_lbl_count | ||
auto_lbl_count+=1 | ||
|
||
labelid=current_labels[label] | ||
yolo_x= (xmin + ((xmax-xmin)/2))/width | ||
yolo_y= (ymin + ((ymax-ymin)/2))/height | ||
yolo_w = (xmax - xmin) / width | ||
yolo_h = (ymax - ymin) / height | ||
|
||
if len(_yoloAnnotationContent) != 0: | ||
_yoloAnnotationContent += "\n" | ||
|
||
_yoloAnnotationContent+=str(labelid)+" "+"{:.6f}".format(yolo_x) +" "+"{:.6f}".format(yolo_y) +" "+"{:.6f}".format(yolo_w) +" "+"{:.6f}".format(yolo_h) | ||
anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.txt') | ||
anno_path = os.path.join(image_dir, anno_name) | ||
|
||
_yoloFile = open(anno_path, "w", newline="\n") | ||
_yoloFile.write(_yoloAnnotationContent) | ||
_yoloFile.close() | ||
|
||
if len(traintxt)!=0: | ||
traintxt+="\n" | ||
|
||
traintxt+=image_path | ||
|
||
else: | ||
for img_tag in cvat_xml.findall('image'): | ||
image_name = img_tag.get('name') | ||
width = img_tag.get('width') | ||
height = img_tag.get('height') | ||
image_path = os.path.join(image_dir, image_name) | ||
if not os.path.exists(image_path): | ||
log.warn('{} image cannot be found. Is `{}` image directory correct?'. | ||
format(image_path, image_dir)) | ||
|
||
unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS) | ||
if unknown_tags: | ||
log.warn('Ignoring tags for image {}: {}'.format(image_path, unknown_tags)) | ||
|
||
_yoloAnnotationContent = "" | ||
|
||
for box in img_tag.findall('box'): | ||
label = box.get('label') | ||
xmin = float(box.get('xtl')) | ||
ymin = float(box.get('ytl')) | ||
xmax = float(box.get('xbr')) | ||
ymax = float(box.get('ybr')) | ||
|
||
if not label in current_labels: | ||
current_labels[label] = auto_lbl_count | ||
auto_lbl_count += 1 | ||
|
||
labelid = current_labels[label] | ||
yolo_x = (xmin + ((xmax-xmin)/2))/width | ||
yolo_y = (ymin + ((ymax-ymin)/2))/height | ||
yolo_w = (xmax - xmin) / width | ||
yolo_h = (ymax - ymin) / height | ||
|
||
if len(_yoloAnnotationContent) != 0: | ||
_yoloAnnotationContent += "\n" | ||
|
||
_yoloAnnotationContent += str(labelid)+" "+"{:.6f}".format(yolo_x) + " "+"{:.6f}".format( | ||
yolo_y) + " "+"{:.6f}".format(yolo_w) + " "+"{:.6f}".format(yolo_h) | ||
|
||
anno_name = os.path.basename(os.path.splitext(image_name)[0] + '.txt') | ||
anno_path = os.path.join(image_dir, anno_name) | ||
|
||
_yoloFile = open(anno_path, "w", newline="\n") | ||
_yoloFile.write(_yoloAnnotationContent) | ||
_yoloFile.close() | ||
|
||
traintxt_file=open(output_dir+"/train.txt","w",newline="\n") | ||
traintxt_file.write(traintxt) | ||
traintxt_file.close() | ||
|
||
|
||
def main(): | ||
args = parse_args() | ||
process_cvat_xml(args.cvat_xml, args.image_dir, args.output_dir, args.username,args.password,args.labels) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
argparse>=1.1 | ||
lxml>=3.5.0 | ||
glog>=0.3.1 | ||
requests==2.22.0 |