diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/converter.py b/datumaro/datumaro/plugins/tf_detection_api_format/converter.py index a88822d3c8d..481e8a2ed14 100644 --- a/datumaro/datumaro/plugins/tf_detection_api_format/converter.py +++ b/datumaro/datumaro/plugins/tf_detection_api_format/converter.py @@ -5,6 +5,7 @@ import codecs from collections import OrderedDict +import hashlib import logging as log import os import os.path as osp @@ -180,15 +181,18 @@ def _make_tf_example(self, item): features.update({ 'image/encoded': bytes_feature(b''), - 'image/format': bytes_feature(b'') + 'image/format': bytes_feature(b''), + 'image/key/sha256': bytes_feature(b''), }) if self._save_images: if item.has_image and item.image.has_data: buffer, fmt = self._save_image(item, filename) + key = hashlib.sha256(buffer).hexdigest() features.update({ 'image/encoded': bytes_feature(buffer), 'image/format': bytes_feature(fmt.encode('utf-8')), + 'image/key/sha256': bytes_feature(key.encode('utf8')), }) else: log.warning("Item '%s' has no image" % item.id) diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py b/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py index 7928c78846e..f91c8b72a74 100644 --- a/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py +++ b/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py @@ -85,6 +85,10 @@ def _parse_tfrecord_file(cls, filepath, subset, images_dir): 'image/width': tf.io.FixedLenFeature([], tf.int64), 'image/encoded': tf.io.FixedLenFeature([], tf.string), 'image/format': tf.io.FixedLenFeature([], tf.string), + + # use varlen to avoid errors when this field is missing + 'image/key/sha256': tf.io.VarLenFeature(tf.string), + # Object boxes and classes. 'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32), 'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),