EscVM · rbell1988 · Jan 4, 2020 · Jan 4, 2020 · Jan 5, 2020 · Jan 5, 2020
diff --git a/README.md b/README.md
@@ -1,3 +1,13 @@
+<h1> Forked repository and added conversion python script </h1>
+My added script is: 
+<strong>convert_annotations.py</strong>
+
+Use toolkit normally to gather images from open images dataset. After gathering images just run from root directory:
+```bash
+python convert_annotations.py
+```
+This will generate .txt annotation files in proper format for custom object detection with YOLOv3. The text files are generated in folder with images.
+
 <h1 align="center"> ~ OIDv4 ToolKit ~ </h1>
 
 Do you want to build your personal object detector but you don't have enough images to train your model? Do you want to train your personal image classifier, but you are tired of the deadly slowness of ImageNet? Have you already discovered [Open Images Dataset v4](https://storage.googleapis.com/openimages/web/index.html) that has [600](https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy_visualizer/circle.html) classes and more than 1,700,000 images with related bounding boxes ready to use? Do you want to exploit it for your projects but you don't want to download gigabytes and gigabytes of data!?

diff --git a/convert_annotations.py b/convert_annotations.py
@@ -0,0 +1,79 @@
+import os
+import cv2
+import numpy as np
+from tqdm import tqdm
+import argparse
+import fileinput
+
+# function that turns XMin, YMin, XMax, YMax coordinates to normalized yolo format
+def convert(filename_str, coords):
+    os.chdir("..")
+    image = cv2.imread(filename_str + ".jpg")
+    coords[2] -= coords[0]
+    coords[3] -= coords[1]
+    x_diff = int(coords[2]/2)
+    y_diff = int(coords[3]/2)
+    coords[0] = coords[0]+x_diff
+    coords[1] = coords[1]+y_diff
+    coords[0] /= int(image.shape[1])
+    coords[1] /= int(image.shape[0])
+    coords[2] /= int(image.shape[1])
+    coords[3] /= int(image.shape[0])
+    os.chdir("Label")
+    return coords
+
+ROOT_DIR = os.getcwd()
+
+# create dict to map class names to numbers for yolo
+classes = {}
+with open("classes.txt", "r") as myFile:
+    for num, line in enumerate(myFile, 0):
+        line = line.rstrip("\n")
+        classes[line] = num
+    myFile.close()
+# step into dataset directory
+os.chdir(os.path.join("OID", "Dataset"))
+DIRS = os.listdir(os.getcwd())
+
+# for all train, validation and test folders
+for DIR in DIRS:
+    if os.path.isdir(DIR):
+        os.chdir(DIR)
+        print("Currently in subdirectory:", DIR)
+
+        CLASS_DIRS = os.listdir(os.getcwd())
+        # for all class folders step into directory to change annotations
+        for CLASS_DIR in CLASS_DIRS:
+            if os.path.isdir(CLASS_DIR):
+                os.chdir(CLASS_DIR)
+                print("Converting annotations for class: ", CLASS_DIR)
+
+                # Step into Label folder where annotations are generated
+                os.chdir("Label")
+
+                for filename in tqdm(os.listdir(os.getcwd())):
+                    filename_str = str.split(filename, ".")[0]
+                    if filename.endswith(".txt"):
+                        annotations = []
+                        with open(filename) as f:
+                            for line in f:
+                                for class_type in classes:
+                                    line = line.replace(class_type, str(classes.get(class_type)))
+                                labels = line.split()
+                                coords = np.asarray([float(labels[1]), float(labels[2]), float(labels[3]), float(labels[4])])
+                                coords = convert(filename_str, coords)
+                                labels[1], labels[2], labels[3], labels[4] = coords[0], coords[1], coords[2], coords[3]
+                                newline = str(labels[0]) + " " + str(labels[1]) + " " + str(labels[2]) + " " + str(labels[3]) + " " + str(labels[4])
+                                line = line.replace(line, newline)
+                                annotations.append(line)
+                            f.close()
+                        os.chdir("..")
+                        with open(filename, "w") as outfile:
+                            for line in annotations:
+                                outfile.write(line)
+                                outfile.write("\n")
+                            outfile.close()
+                        os.chdir("Label")
+                os.chdir("..")
+                os.chdir("..")
+        os.chdir("..")
diff --git a/modules/downloader.py b/modules/downloader.py
@@ -130,7 +130,7 @@ def get_label(folder, dataset_dir, class_name, class_code, df_val, class_list, a
                     box[2] *= int(dataset_image.shape[0])
                     box[3] *= int(dataset_image.shape[0])
 
-                    # each row in a file is name of the class_name, XMin, YMix, XMax, YMax (left top right bottom)
+                    # each row in a file is name of the class_name, XMin, YMin, XMax, YMax (left top right bottom)
                     print(class_name, box[0], box[2], box[1], box[3], file=f)
 
             except Exception as e:

diff --git a/requirements.txt b/requirements.txt
@@ -1,9 +1,6 @@
 pandas
 numpy
 awscli
-
 urllib3
-
 tqdm
-
-opencv-python
+opencv-python