Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Useful suggestion #97

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
<h1> Forked repository and added conversion python script </h1>
My added script is:
<strong>convert_annotations.py</strong>

Use toolkit normally to gather images from open images dataset. After gathering images just run from root directory:
```bash
python convert_annotations.py
```
This will generate .txt annotation files in proper format for custom object detection with YOLOv3. The text files are generated in folder with images.

<h1 align="center"> ~ OIDv4 ToolKit ~ </h1>

Do you want to build your personal object detector but you don't have enough images to train your model? Do you want to train your personal image classifier, but you are tired of the deadly slowness of ImageNet? Have you already discovered [Open Images Dataset v4](https://storage.googleapis.com/openimages/web/index.html) that has [600](https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy_visualizer/circle.html) classes and more than 1,700,000 images with related bounding boxes ready to use? Do you want to exploit it for your projects but you don't want to download gigabytes and gigabytes of data!?
Expand Down
79 changes: 79 additions & 0 deletions convert_annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import os
import cv2
import numpy as np
from tqdm import tqdm
import argparse
import fileinput

# function that turns XMin, YMin, XMax, YMax coordinates to normalized yolo format
def convert(filename_str, coords):
os.chdir("..")
image = cv2.imread(filename_str + ".jpg")
coords[2] -= coords[0]
coords[3] -= coords[1]
x_diff = int(coords[2]/2)
y_diff = int(coords[3]/2)
coords[0] = coords[0]+x_diff
coords[1] = coords[1]+y_diff
coords[0] /= int(image.shape[1])
coords[1] /= int(image.shape[0])
coords[2] /= int(image.shape[1])
coords[3] /= int(image.shape[0])
os.chdir("Label")
return coords

ROOT_DIR = os.getcwd()

# create dict to map class names to numbers for yolo
classes = {}
with open("classes.txt", "r") as myFile:
for num, line in enumerate(myFile, 0):
line = line.rstrip("\n")
classes[line] = num
myFile.close()
# step into dataset directory
os.chdir(os.path.join("OID", "Dataset"))
DIRS = os.listdir(os.getcwd())

# for all train, validation and test folders
for DIR in DIRS:
if os.path.isdir(DIR):
os.chdir(DIR)
print("Currently in subdirectory:", DIR)

CLASS_DIRS = os.listdir(os.getcwd())
# for all class folders step into directory to change annotations
for CLASS_DIR in CLASS_DIRS:
if os.path.isdir(CLASS_DIR):
os.chdir(CLASS_DIR)
print("Converting annotations for class: ", CLASS_DIR)

# Step into Label folder where annotations are generated
os.chdir("Label")

for filename in tqdm(os.listdir(os.getcwd())):
filename_str = str.split(filename, ".")[0]
if filename.endswith(".txt"):
annotations = []
with open(filename) as f:
for line in f:
for class_type in classes:
line = line.replace(class_type, str(classes.get(class_type)))
labels = line.split()
coords = np.asarray([float(labels[1]), float(labels[2]), float(labels[3]), float(labels[4])])
coords = convert(filename_str, coords)
labels[1], labels[2], labels[3], labels[4] = coords[0], coords[1], coords[2], coords[3]
newline = str(labels[0]) + " " + str(labels[1]) + " " + str(labels[2]) + " " + str(labels[3]) + " " + str(labels[4])
line = line.replace(line, newline)
annotations.append(line)
f.close()
os.chdir("..")
with open(filename, "w") as outfile:
for line in annotations:
outfile.write(line)
outfile.write("\n")
outfile.close()
os.chdir("Label")
os.chdir("..")
os.chdir("..")
os.chdir("..")
2 changes: 1 addition & 1 deletion modules/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def get_label(folder, dataset_dir, class_name, class_code, df_val, class_list, a
box[2] *= int(dataset_image.shape[0])
box[3] *= int(dataset_image.shape[0])

# each row in a file is name of the class_name, XMin, YMix, XMax, YMax (left top right bottom)
# each row in a file is name of the class_name, XMin, YMin, XMax, YMax (left top right bottom)
print(class_name, box[0], box[2], box[1], box[3], file=f)

except Exception as e:
Expand Down
5 changes: 1 addition & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
pandas
numpy
awscli

urllib3

tqdm

opencv-python
opencv-python