-
Notifications
You must be signed in to change notification settings - Fork 0
/
datasetMaker.py
118 lines (98 loc) · 4.13 KB
/
datasetMaker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""
Divides the data into the following folders:
With ratio: 70:20:10 ratio
custom_dataset
├── images
│ ├── train
│ │ ├── train0.jpg
│ │ └── train1.jpg
│ ├── val
│ │ ├── val0.jpg
│ │ └── val1.jpg
│ └── test
│ ├── test0.jpg
│ └── test1.jpg
└── labels
├── train
│ ├── train0.txt
│ └── train1.txt
├── val
│ ├── val0.txt
│ └── val1.txt
└── test
├── test0.txt
└── test1.txt
"""
import os
import pandas as pd
import numpy as np
import shutil
from sklearn.model_selection import train_test_split
current_path = os.getcwd()
dataset_path = os.path.join(current_path, "custom_dataset")
try:
os.makedirs(dataset_path)
except FileExistsError:
pass
custom_dataset_images_path = os.path.join(dataset_path, "images")
custom_dataset_labels_path = os.path.join(dataset_path, "labels")
try:
os.makedirs(custom_dataset_images_path)
os.makedirs(custom_dataset_labels_path)
except FileExistsError:
pass
custom_dataset_images_train_path = os.path.join(custom_dataset_images_path, "train")
custom_dataset_images_val_path = os.path.join(custom_dataset_images_path, "val")
custom_dataset_images_test_path = os.path.join(custom_dataset_images_path, "test")
custom_dataset_labels_train_path = os.path.join(custom_dataset_labels_path, "train")
custom_dataset_labels_val_path = os.path.join(custom_dataset_labels_path, "val")
custom_dataset_labels_test_path = os.path.join(custom_dataset_labels_path, "test")
try:
os.makedirs(custom_dataset_images_train_path)
os.makedirs(custom_dataset_images_val_path)
os.makedirs(custom_dataset_images_test_path)
os.makedirs(custom_dataset_labels_train_path)
os.makedirs(custom_dataset_labels_val_path)
os.makedirs(custom_dataset_labels_test_path)
except FileExistsError:
pass
def splitdata(trainsize=0.7, testsize=0.1, valsize=0.2, splitImages = True):
current_path = os.getcwd()
image_folder = os.path.join(current_path, "JPEGImage\JPEGImage")
# image_folder = os.path.join(current_path, "JPEGImage")
df = pd.read_excel("objectsYolo.xlsx",index_col=None)
X = np.unique(df["pictureID"])
X_train_val, X_test = train_test_split(X, test_size=0.1, random_state=1)
X_train, X_val = train_test_split(X_train_val, test_size=2/9, random_state=1)
for image in X_train:
try:
if splitImages:
old_image_path = os.path.join(image_folder, image+'.jpg')
new_image_path = os.path.join(custom_dataset_images_train_path, image+".jpg")
shutil.copyfile(old_image_path, new_image_path)
df_image = df[df["pictureID"]==image].drop(columns=['Unnamed: 0','pictureID'])
df_image.to_csv(r'custom_dataset\labels\train\{}.txt'.format(image), header=False, index=None, sep=' ', mode='a')
except FileNotFoundError:
pass
# break
for image in X_test:
try:
if splitImages:
old_image_path = os.path.join(image_folder, image+'.jpg')
new_image_path = os.path.join(custom_dataset_images_test_path, image+".jpg")
shutil.copyfile(old_image_path, new_image_path)
df_image = df[df["pictureID"]==image].drop(columns=['Unnamed: 0','pictureID'])
df_image.to_csv(r'custom_dataset\labels\test\{}.txt'.format(image), header=False, index=None, sep=' ', mode='a')
except FileNotFoundError:
pass
for image in X_val:
try:
if splitImages:
old_image_path = os.path.join(image_folder, image+'.jpg')
new_image_path = os.path.join(custom_dataset_images_val_path, image+".jpg")
shutil.copyfile(old_image_path, new_image_path)
df_image = df[df["pictureID"]==image].drop(columns=['Unnamed: 0','pictureID'])
df_image.to_csv(r'custom_dataset\labels\val\{}.txt'.format(image), header=False, index=None, sep=' ', mode='a')
except FileNotFoundError:
pass
splitdata(splitImages = False)