Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report errors for COCO (stream) and Datumaro importers #1110

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/1054>)
- Fix UnionMerge
(<https://github.com/openvinotoolkit/datumaro/pull/1086>)
- Report errors for COCO (stream) and Datumaro importers
(<https://github.com/openvinotoolkit/datumaro/pull/1110>)

## 26/05/2023 - Release 1.3.2
### Enhancements
Expand Down
13 changes: 9 additions & 4 deletions src/datumaro/plugins/data_formats/coco/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,15 @@ def _stream_items(self) -> Iterator[DatasetItem]:
self._length = length

def _parse_anns(self, img_info, ann_info, item):
if self._task is not CocoTask.panoptic:
self._load_annotations(ann_info, img_info, parsed_annotations=item.annotations)
else:
self._load_panoptic_ann(ann_info, parsed_annotations=item.annotations)
try:
if self._task is not CocoTask.panoptic:
self._load_annotations(ann_info, img_info, parsed_annotations=item.annotations)
else:
self._load_panoptic_ann(ann_info, parsed_annotations=item.annotations)
except Exception as e:
self._ctx.error_policy.report_annotation_error(
e, item_id=(ann_info.get("id", None), self._subset)
)

def _load_items(self, json_data):
pbar = self._ctx.progress_reporter
Expand Down
307 changes: 159 additions & 148 deletions src/datumaro/plugins/data_formats/datumaro/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,183 +123,194 @@ def _gen():
_gen(), desc=f"Importing '{self._subset}'", total=len(item_descs)
):
item = self._parse_item(item_desc)
items.append(item)
if item is not None:
items.append(item)

return items

def _parse_item(self, item_desc):
item_id = item_desc["id"]

media = None
image_info = item_desc.get("image")
if image_info:
image_filename = image_info.get("path") or item_id + DatumaroPath.IMAGE_EXT
image_path = osp.join(self._images_dir, self._subset, image_filename)
if not osp.isfile(image_path):
# backward compatibility
old_image_path = osp.join(self._images_dir, image_filename)
if osp.isfile(old_image_path):
image_path = old_image_path

media = Image.from_file(path=image_path, size=image_info.get("size"))
if self.media_type == MediaElement:
self.media_type = Image

pcd_info = item_desc.get("point_cloud")
if media and pcd_info:
raise MediaTypeError("Dataset cannot contain multiple media types")
if pcd_info:
pcd_path = pcd_info.get("path")
point_cloud = osp.join(self._pcd_dir, self._subset, pcd_path)

related_images = None
ri_info = item_desc.get("related_images")
if ri_info:
related_images = [
Image.from_file(
size=ri.get("size"),
path=osp.join(self._images_dir, self._subset, ri.get("path")),
)
for ri in ri_info
]

media = PointCloud.from_file(path=point_cloud, extra_images=related_images)
if self.media_type == MediaElement:
self.media_type = PointCloud

media_desc = item_desc.get("media")
if not media and media_desc and media_desc.get("path"):
media = MediaElement(path=media_desc.get("path"))
def _parse_item(self, item_desc: Dict) -> Optional[DatasetItem]:
try:
item_id = item_desc["id"]

media = None
image_info = item_desc.get("image")
if image_info:
image_filename = image_info.get("path") or item_id + DatumaroPath.IMAGE_EXT
image_path = osp.join(self._images_dir, self._subset, image_filename)
if not osp.isfile(image_path):
# backward compatibility
old_image_path = osp.join(self._images_dir, image_filename)
if osp.isfile(old_image_path):
image_path = old_image_path

media = Image.from_file(path=image_path, size=image_info.get("size"))
if self.media_type == MediaElement:
self.media_type = Image

pcd_info = item_desc.get("point_cloud")
if media and pcd_info:
raise MediaTypeError("Dataset cannot contain multiple media types")
if pcd_info:
pcd_path = pcd_info.get("path")
point_cloud = osp.join(self._pcd_dir, self._subset, pcd_path)

related_images = None
ri_info = item_desc.get("related_images")
if ri_info:
related_images = [
Image.from_file(
size=ri.get("size"),
path=osp.join(self._images_dir, self._subset, ri.get("path")),
)
for ri in ri_info
]

media = PointCloud.from_file(path=point_cloud, extra_images=related_images)
if self.media_type == MediaElement:
self.media_type = PointCloud

media_desc = item_desc.get("media")
if not media and media_desc and media_desc.get("path"):
media = MediaElement(path=media_desc.get("path"))

except Exception as e:
self._ctx.error_policy.report_item_error(
e, item_id=(item_desc.get("id", None), self._subset)
)
return None

annotations = self._load_annotations(item_desc)

item = DatasetItem(
return DatasetItem(
id=item_id,
subset=self._subset,
annotations=annotations,
media=media,
attributes=item_desc.get("attr"),
)

return item

@staticmethod
def _load_annotations(item):
parsed = item["annotations"]
def _load_annotations(self, item: Dict):
loaded = []

for ann in parsed:
ann_id = ann.get("id")
ann_type = AnnotationType[ann["type"]]
attributes = ann.get("attributes")
group = ann.get("group")

label_id = ann.get("label_id")
z_order = ann.get("z_order")
points = ann.get("points")

if ann_type == AnnotationType.label:
loaded.append(Label(label=label_id, id=ann_id, attributes=attributes, group=group))

elif ann_type == AnnotationType.mask:
rle = ann["rle"]
rle["counts"] = rle["counts"].encode("ascii")
loaded.append(
RleMask(
rle=rle,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
for ann in item.get("annotations", []):
try:
ann_id = ann.get("id")
ann_type = AnnotationType[ann["type"]]
attributes = ann.get("attributes")
group = ann.get("group")

label_id = ann.get("label_id")
z_order = ann.get("z_order")
points = ann.get("points")

if ann_type == AnnotationType.label:
loaded.append(
Label(label=label_id, id=ann_id, attributes=attributes, group=group)
)
)

elif ann_type == AnnotationType.polyline:
loaded.append(
PolyLine(
points,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
elif ann_type == AnnotationType.mask:
rle = ann["rle"]
rle["counts"] = rle["counts"].encode("ascii")
loaded.append(
RleMask(
rle=rle,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
)
)
)

elif ann_type == AnnotationType.polygon:
loaded.append(
Polygon(
points,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
elif ann_type == AnnotationType.polyline:
loaded.append(
PolyLine(
points,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
)
)
)

elif ann_type == AnnotationType.bbox:
x, y, w, h = ann["bbox"]
loaded.append(
Bbox(
x,
y,
w,
h,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
elif ann_type == AnnotationType.polygon:
loaded.append(
Polygon(
points,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
)
)
)

elif ann_type == AnnotationType.points:
loaded.append(
Points(
points,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
elif ann_type == AnnotationType.bbox:
x, y, w, h = ann["bbox"]
loaded.append(
Bbox(
x,
y,
w,
h,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
)
)
)

elif ann_type == AnnotationType.caption:
caption = ann.get("caption")
loaded.append(Caption(caption, id=ann_id, attributes=attributes, group=group))

elif ann_type == AnnotationType.cuboid_3d:
loaded.append(
Cuboid3d(
ann.get("position"),
ann.get("rotation"),
ann.get("scale"),
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
elif ann_type == AnnotationType.points:
loaded.append(
Points(
points,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
)
)
)

elif ann_type == AnnotationType.ellipse:
loaded.append(
Ellipse(
*points,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
elif ann_type == AnnotationType.caption:
caption = ann.get("caption")
loaded.append(Caption(caption, id=ann_id, attributes=attributes, group=group))

elif ann_type == AnnotationType.cuboid_3d:
loaded.append(
Cuboid3d(
ann.get("position"),
ann.get("rotation"),
ann.get("scale"),
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
)
)

elif ann_type == AnnotationType.ellipse:
loaded.append(
Ellipse(
*points,
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
z_order=z_order,
)
)
)

elif ann_type == AnnotationType.hash_key:
continue
else:
raise NotImplementedError()
elif ann_type == AnnotationType.hash_key:
continue
else:
raise NotImplementedError()
except Exception as e:
self._ctx.error_policy.report_annotation_error(
e, item_id=(ann.get("id", None), self._subset)
)

return loaded

Expand Down
Loading