Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add optional dataset.yaml path attribute #3753

Merged
merged 29 commits into from
Jun 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
8f94873
Add optional dataset.yaml `path` attribute
glenn-jocher Jun 23, 2021
d0367dc
pass locals to python scripts
glenn-jocher Jun 24, 2021
dc54136
handle lists
glenn-jocher Jun 24, 2021
6370565
update coco128.yaml
glenn-jocher Jun 24, 2021
cd52628
Capitalize first letter
glenn-jocher Jun 24, 2021
a0815a1
add test key
glenn-jocher Jun 24, 2021
451fe11
finalize GlobalWheat2020.yaml
glenn-jocher Jun 24, 2021
ac22a56
finalize objects365.yaml
glenn-jocher Jun 24, 2021
1ad6156
finalize SKU-110K.yaml
glenn-jocher Jun 24, 2021
ec16dd3
finalize SKU-110K.yaml
glenn-jocher Jun 24, 2021
7a989dd
finalize VisDrone.yaml
glenn-jocher Jun 24, 2021
48f51a0
NoneType fix
glenn-jocher Jun 24, 2021
131a0b9
update download comment
glenn-jocher Jun 24, 2021
c260622
voc to VOC
glenn-jocher Jun 24, 2021
eeb64dc
update
glenn-jocher Jun 24, 2021
8e676d3
update VOC.yaml
glenn-jocher Jun 24, 2021
083e0af
update VOC.yaml
glenn-jocher Jun 24, 2021
2467ab4
remove dashes
glenn-jocher Jun 24, 2021
7faca1d
delete get_voc.sh
glenn-jocher Jun 24, 2021
57adf71
force coco and coco128 to ../datasets
glenn-jocher Jun 24, 2021
9f2d764
Capitalize Argoverse_HD.yaml
glenn-jocher Jun 24, 2021
313f04f
Capitalize Objects365.yaml
glenn-jocher Jun 24, 2021
f9217ea
merge master
glenn-jocher Jun 24, 2021
d865219
update Argoverse_HD.yaml
glenn-jocher Jun 24, 2021
5a9b5b1
coco segments fix
glenn-jocher Jun 24, 2021
9bfc09a
VOC single-thread
glenn-jocher Jun 24, 2021
f75ff30
update Argoverse_HD.yaml
glenn-jocher Jun 24, 2021
173cca2
update data_dict in test handling
glenn-jocher Jun 24, 2021
7bcac6b
create root
glenn-jocher Jun 24, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions data/Argoverse_HD.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
# Train command: python train.py --data Argoverse_HD.yaml
# Default dataset location is next to YOLOv5:
# /parent
# /datasets/Argoverse
# /yolov5


# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/Argoverse # dataset root dir
train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview

# Classes
nc: 8 # number of classes
names: [ 'person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign' ] # class names


# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import json

from tqdm import tqdm
from utils.general import download, Path


def argoverse2yolo(set):
labels = {}
a = json.load(open(set, "rb"))
for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
img_id = annot['image_id']
img_name = a['images'][img_id]['name']
img_label_name = img_name[:-3] + "txt"

cls = annot['category_id'] # instance class id
x_center, y_center, width, height = annot['bbox']
x_center = (x_center + width / 2) / 1920.0 # offset and scale
y_center = (y_center + height / 2) / 1200.0 # offset and scale
width /= 1920.0 # scale
height /= 1200.0 # scale

img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
if not img_dir.exists():
img_dir.mkdir(parents=True, exist_ok=True)

k = str(img_dir / img_label_name)
if k not in labels:
labels[k] = []
labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")

for k in labels:
with open(k, "w") as f:
f.writelines(labels[k])


# Download
dir = Path('../datasets/Argoverse') # dataset root dir
urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
download(urls, dir=dir, delete=False)

# Convert
annotations_dir = 'Argoverse-HD/annotations/'
(dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images'
for d in "train.json", "val.json":
argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels
55 changes: 26 additions & 29 deletions data/GlobalWheat2020.yaml
Original file line number Diff line number Diff line change
@@ -1,43 +1,40 @@
# Global Wheat 2020 dataset http://www.global-wheat.com/
# Train command: python train.py --data GlobalWheat2020.yaml
# Default dataset location is next to YOLOv5:
# /parent_folder
# /parent
# /datasets/GlobalWheat2020
# /yolov5


# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: # 3422 images
- ../datasets/GlobalWheat2020/images/arvalis_1
- ../datasets/GlobalWheat2020/images/arvalis_2
- ../datasets/GlobalWheat2020/images/arvalis_3
- ../datasets/GlobalWheat2020/images/ethz_1
- ../datasets/GlobalWheat2020/images/rres_1
- ../datasets/GlobalWheat2020/images/inrae_1
- ../datasets/GlobalWheat2020/images/usask_1

val: # 748 images (WARNING: train set contains ethz_1)
- ../datasets/GlobalWheat2020/images/ethz_1

test: # 1276 images
- ../datasets/GlobalWheat2020/images/utokyo_1
- ../datasets/GlobalWheat2020/images/utokyo_2
- ../datasets/GlobalWheat2020/images/nau_1
- ../datasets/GlobalWheat2020/images/uq_1

# number of classes
nc: 1

# class names
names: [ 'wheat_head' ]


# download command/URL (optional) --------------------------------------------------------------------------------------
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/GlobalWheat2020 # dataset root dir
train: # train images (relative to 'path') 3422 images
- images/arvalis_1
- images/arvalis_2
- images/arvalis_3
- images/ethz_1
- images/rres_1
- images/inrae_1
- images/usask_1
val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
- images/ethz_1
test: # test images (optional) 1276 images
- images/utokyo_1
- images/utokyo_2
- images/nau_1
- images/uq_1

# Classes
nc: 1 # number of classes
names: [ 'wheat_head' ] # class names


# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
from utils.general import download, Path

# Download
dir = Path('../datasets/GlobalWheat2020') # dataset directory
dir = Path(yaml['path']) # dataset root dir
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
download(urls, dir=dir)
Expand Down
23 changes: 12 additions & 11 deletions data/objects365.yaml → data/Objects365.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
# Objects365 dataset https://www.objects365.org/
# Train command: python train.py --data objects365.yaml
# Train command: python train.py --data Objects365.yaml
# Default dataset location is next to YOLOv5:
# /parent_folder
# /datasets/objects365
# /parent
# /datasets/Objects365
# /yolov5

# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: ../datasets/objects365/images/train # 1742289 images
val: ../datasets/objects365/images/val # 5570 images

# number of classes
nc: 365
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/Objects365 # dataset root dir
train: images/train # train images (relative to 'path') 1742289 images
val: images/val # val images (relative to 'path') 5570 images
test: # test images (optional)

# class names
# Classes
nc: 365 # number of classes
names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
Expand Down Expand Up @@ -56,15 +57,15 @@ names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gl
'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ]


# download command/URL (optional) --------------------------------------------------------------------------------------
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
from pycocotools.coco import COCO
from tqdm import tqdm

from utils.general import download, Path

# Make Directories
dir = Path('../datasets/objects365') # dataset directory
dir = Path(yaml['path']) # dataset root dir
for p in 'images', 'labels':
(dir / p).mkdir(parents=True, exist_ok=True)
for q in 'train', 'val':
Expand Down
29 changes: 14 additions & 15 deletions data/SKU-110K.yaml
Original file line number Diff line number Diff line change
@@ -1,39 +1,38 @@
# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19
# Train command: python train.py --data SKU-110K.yaml
# Default dataset location is next to YOLOv5:
# /parent_folder
# /parent
# /datasets/SKU-110K
# /yolov5


# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: ../datasets/SKU-110K/train.txt # 8219 images
val: ../datasets/SKU-110K/val.txt # 588 images
test: ../datasets/SKU-110K/test.txt # 2936 images
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/SKU-110K # dataset root dir
train: train.txt # train images (relative to 'path') 8219 images
val: val.txt # val images (relative to 'path') 588 images
test: test.txt # test images (optional) 2936 images

# number of classes
nc: 1
# Classes
nc: 1 # number of classes
names: [ 'object' ] # class names

# class names
names: [ 'object' ]


# download command/URL (optional) --------------------------------------------------------------------------------------
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import shutil
from tqdm import tqdm
from utils.general import np, pd, Path, download, xyxy2xywh

# Download
datasets = Path('../datasets') # download directory
dir = Path(yaml['path']) # dataset root dir
parent = Path(dir.parent) # download dir
urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
download(urls, dir=datasets, delete=False)
download(urls, dir=parent, delete=False)

# Rename directories
dir = (datasets / 'SKU-110K')
if dir.exists():
shutil.rmtree(dir)
(datasets / 'SKU110K_fixed').rename(dir) # rename dir
(parent / 'SKU110K_fixed').rename(dir) # rename dir
(dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir

# Convert labels
Expand Down
79 changes: 79 additions & 0 deletions data/VOC.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
# Train command: python train.py --data VOC.yaml
# Default dataset location is next to YOLOv5:
# /parent
# /datasets/VOC
# /yolov5


# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VOC
train: # train images (relative to 'path') 16551 images
- images/train2012
- images/train2007
- images/val2012
- images/val2007
val: # val images (relative to 'path') 4952 images
- images/test2007
test: # test images (optional)
- images/test2007

# Classes
nc: 20 # number of classes
names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] # class names


# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import xml.etree.ElementTree as ET

from tqdm import tqdm
from utils.general import download, Path


def convert_label(path, lb_path, year, image_id):
def convert_box(size, box):
dw, dh = 1. / size[0], 1. / size[1]
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh

in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
out_file = open(lb_path, 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)

for obj in root.iter('object'):
cls = obj.find('name').text
if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
xmlbox = obj.find('bndbox')
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
cls_id = yaml['names'].index(cls) # class id
out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')


# Download
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
download(urls, dir=dir / 'images', delete=False)

# Convert
path = dir / f'images/VOCdevkit'
for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
imgs_path = dir / 'images' / f'{image_set}{year}'
lbs_path = dir / 'labels' / f'{image_set}{year}'
imgs_path.mkdir(exist_ok=True, parents=True)
lbs_path.mkdir(exist_ok=True, parents=True)

image_ids = open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt').read().strip().split()
for id in tqdm(image_ids, desc=f'{image_set}{year}'):
f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
f.rename(imgs_path / f.name) # move image
convert_label(path, lb_path, year, id) # convert labels to YOLO format
23 changes: 11 additions & 12 deletions data/VisDrone.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,23 @@
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
# Train command: python train.py --data VisDrone.yaml
# Default dataset location is next to YOLOv5:
# /parent_folder
# /VisDrone
# /parent
# /datasets/VisDrone
# /yolov5


# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: ../VisDrone/VisDrone2019-DET-train/images # 6471 images
val: ../VisDrone/VisDrone2019-DET-val/images # 548 images
test: ../VisDrone/VisDrone2019-DET-test-dev/images # 1610 images
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VisDrone # dataset root dir
train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images

# number of classes
nc: 10

# class names
# Classes
nc: 10 # number of classes
names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]


# download command/URL (optional) --------------------------------------------------------------------------------------
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
from utils.general import download, os, Path

Expand Down Expand Up @@ -49,7 +48,7 @@ download: |


# Download
dir = Path('../VisDrone') # dataset directory
dir = Path(yaml['path']) # dataset root dir
urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
Expand Down
21 changes: 0 additions & 21 deletions data/argoverse_hd.yaml

This file was deleted.

Loading