-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add gtsdb and stsd datasets * use object instead of np.object * update tests * add setuptools
- Loading branch information
Showing
5 changed files
with
234 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,3 +18,4 @@ jinja2 | |
pydata-sphinx-theme==0.8.1 | ||
sphinx==4.5.0 | ||
sphinx_remove_toctrees | ||
setuptools |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import os | ||
import re | ||
|
||
import pandas as pd | ||
|
||
import meerkat as mk | ||
|
||
from ..abstract import DatasetBuilder | ||
from ..info import DatasetInfo | ||
from ..registry import datasets | ||
from ..utils import download_url, extract | ||
|
||
_URL = "https://sid.erda.dk/public/archives/ff17dc924eba88d5d01a807357d6614c/FullIJCNN{version}.zip" # noqa: E501 | ||
|
||
|
||
@datasets.register() | ||
class gtsdb(DatasetBuilder): | ||
"""German Traffic Sign Detection Benchmark GTSDB.""" | ||
|
||
VERSIONS = ["2013"] | ||
|
||
info = DatasetInfo( | ||
name="gtsdb", | ||
full_name="German Traffic Sign Detection Benchmark GTSDB", | ||
description=("Image data set to detect street signs."), | ||
homepage="https://sid.erda.dk/public/archives/ff17dc924eba88d5d01a807357d6614c/published-archive.html", # noqa: E501 | ||
tags=["image", "object recognition"], | ||
citation=None, | ||
) | ||
|
||
def build(self): | ||
"""Get the processed dataframe hosted on huggingface.""" | ||
folder = os.path.join(self.dataset_dir, f"FullIJCNN{self.version}") | ||
gt_ann = pd.read_csv(os.path.join(folder, "gt.txt"), sep=";", header=None) | ||
|
||
# Format categories | ||
readme = os.path.join(folder, "ReadMe.txt") | ||
with open(readme, "r") as f: | ||
lines = [ | ||
x.strip() for x in f.readlines() if re.match("^[0-9]* = .*$", x.strip()) | ||
] | ||
|
||
categories = [] | ||
for line in lines: | ||
category_id, category_full_name = line.split(" = ") | ||
category_id = int(category_id) | ||
category_full_name = category_full_name.strip() | ||
|
||
category_name, supercategory = category_full_name.rsplit(" ", 1) | ||
category_name = category_name.strip() | ||
supercategory = supercategory.strip().strip("(").strip(")") | ||
|
||
categories.append( | ||
{ | ||
"category_id": int(category_id), | ||
"category": category_name, | ||
"supercategory": supercategory, | ||
} | ||
) | ||
|
||
categories = pd.DataFrame(categories) | ||
|
||
# Format dataframe | ||
df = gt_ann.rename( | ||
{0: "filename", 1: "x1", 2: "y1", 3: "x2", 4: "y2", 5: "category_id"}, | ||
axis=1, | ||
) | ||
df = df.merge(categories, on="category_id") | ||
|
||
# Split | ||
images_files = sorted([x for x in os.listdir(folder) if x.endswith(".ppm")]) | ||
image_df = pd.DataFrame({"filename": images_files}) | ||
image_df["split"] = "train" | ||
image_df.loc[600:, "split"] = "test" | ||
df = df.merge(image_df, on="filename") | ||
|
||
df = mk.DataFrame.from_pandas(df).drop("index") | ||
df["image"] = mk.files(df["filename"], base_dir=folder, type="image") | ||
df["image_crop"] = mk.defer(df, crop) | ||
return df | ||
|
||
def download(self): | ||
downloaded_path = download_url( | ||
_URL.format(version=self.version), self.dataset_dir | ||
) | ||
extract(downloaded_path, self.dataset_dir) | ||
|
||
def is_downloaded(self): | ||
return os.path.exists(self.dataset_dir) and os.path.exists( | ||
os.path.join(self.dataset_dir, f"FullIJCNN{self.version}") | ||
) | ||
|
||
|
||
def crop(image, x1, y1, x2, y2): | ||
out = image.crop((x1, y1, x2, y2)) | ||
return out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
# Swedish Traffic Signs Dataset (STSD) | ||
|
||
import os | ||
|
||
import pandas as pd | ||
from tqdm.auto import tqdm | ||
|
||
import meerkat as mk | ||
|
||
from ..abstract import DatasetBuilder | ||
from ..info import DatasetInfo | ||
from ..registry import datasets | ||
from ..utils import download_url, extract | ||
|
||
_SETS_TO_URLS = { | ||
"Set1/annotations.txt": "http://www.isy.liu.se/cvl/research/trafficSigns/swedishSignsSummer/Set1/annotations.txt", # noqa: E501 | ||
"Set2/annotations.txt": "http://www.isy.liu.se/cvl/research/trafficSigns/swedishSignsSummer/Set2/annotations.txt", # noqa: E501 | ||
"Set1/Set1Part0": "http://www.isy.liu.se/cvl/research/trafficSigns/swedishSignsSummer/Set1/Set1Part0.zip", # noqa: E501 | ||
"Set2/Set2Part0": "http://www.isy.liu.se/cvl/research/trafficSigns/swedishSignsSummer/Set2/Set2Part0.zip", # noqa: E501 | ||
} | ||
|
||
|
||
@datasets.register() | ||
class stsd(DatasetBuilder): | ||
"""Swedish Traffic Sign Dataset STSD.""" | ||
|
||
VERSIONS = ["2019"] | ||
|
||
info = DatasetInfo( | ||
name="stsd", | ||
full_name="Swedish Traffic Sign Dataset STSD", | ||
description=("Image data set to detect street signs."), | ||
homepage="https://www.cvl.isy.liu.se/en/research/datasets/traffic-signs-dataset/download/", # noqa: E501 | ||
tags=["image", "object recognition"], | ||
citation=None, | ||
) | ||
|
||
def build(self): | ||
"""Get the processed dataframe hosted on huggingface.""" | ||
annotations = [] | ||
for set_name in ["Set1", "Set2"]: | ||
ann_file = os.path.join(self.dataset_dir, f"{set_name}/annotations.txt") | ||
df = _format_annotations(ann_file) | ||
df["path"] = df["filename"].apply( | ||
lambda x: os.path.join( | ||
self.dataset_dir, set_name, f"{set_name}Part0", x | ||
) | ||
) | ||
annotations.append(df) | ||
annotations = pd.concat(annotations).reset_index(drop=True) | ||
|
||
df = pd.DataFrame(annotations) | ||
df = mk.DataFrame.from_pandas(df).drop("index") | ||
df["image"] = mk.files( | ||
df["path"], | ||
type="image", | ||
) | ||
df["image_crop"] = mk.defer(df, crop) | ||
return df | ||
|
||
def download(self): | ||
for relative_path, url in tqdm(_SETS_TO_URLS.items(), verbose=True): | ||
downloaded_path = download_url(url, self.dataset_dir) | ||
path = os.path.join(self.dataset_dir, relative_path) | ||
if url.endswith(".zip"): | ||
os.makedirs(path, exist_ok=True) | ||
extract(downloaded_path, path) | ||
else: | ||
os.makedirs(os.path.dirname(path), exist_ok=True) | ||
os.rename(downloaded_path, path) | ||
|
||
def is_downloaded(self): | ||
return os.path.exists(self.dataset_dir) and all( | ||
os.path.exists(os.path.join(self.dataset_dir, x)) for x in _SETS_TO_URLS | ||
) | ||
|
||
|
||
def crop(image, x1, y1, x2, y2): | ||
# Don't crop the image if the crop coordinates aren't valid. | ||
if any(v == -1 for v in [x1, y1, x2, y2]): | ||
return image.copy() | ||
out = image.crop((x1, y1, x2, y2)) | ||
return out | ||
|
||
|
||
def _format_annotations(ann_file): | ||
annotations = [] | ||
with open(ann_file, "r") as f: | ||
lines = f.readlines() | ||
|
||
for line in lines: | ||
filename, anns = (x.strip() for x in line.split(":")) | ||
for ann in anns.split(";"): | ||
ann = ann.strip() | ||
if len(ann) == 0: | ||
continue | ||
if ann == "MISC_SIGNS": | ||
annotations.append( | ||
{ | ||
"filename": filename, | ||
"visibility": "N/A", | ||
"x1": -1, | ||
"y1": -1, | ||
"x2": -1, | ||
"y2": -1, | ||
"sign_type": "MISC_SIGNS", | ||
"category": "MISC_SIGNS", | ||
} | ||
) | ||
continue | ||
visibility, x2, y2, x1, y1, sign_type, name = ( | ||
x.strip() for x in ann.split(",") | ||
) | ||
# Annotation file is malformed for this example. | ||
x2, y2, x1, y1 = [ | ||
x.split("l")[0] if "l" in x else x for x in [x2, y2, x1, y1] | ||
] | ||
annotations.append( | ||
{ | ||
"filename": filename, | ||
"visibility": visibility, | ||
"x1": float(x1), | ||
"y1": float(y1), | ||
"x2": float(x2), | ||
"y2": float(y2), | ||
"sign_type": sign_type, | ||
"category": name, | ||
} | ||
) | ||
return pd.DataFrame(annotations) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters