From 84b8a85a2038dce6b31f8bcd011eeb29f71ab14f Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 7 Oct 2020 12:52:36 +0300 Subject: [PATCH] Raw image copying in dataset export (#2229) * Raw image copying in dataset export * use byteimage * use opencv, swith frame data type for videos * Fix image reading * update dm dependency --- cvat/apps/dataset_manager/bindings.py | 35 +++++++++++++++++------ cvat/apps/dataset_manager/formats/cvat.py | 7 +++-- cvat/apps/engine/frame_provider.py | 27 ++++++++++------- cvat/apps/engine/media_extractors.py | 2 ++ cvat/requirements/base.txt | 2 +- 5 files changed, 50 insertions(+), 23 deletions(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 30ec8bd90786..02d4eaabdf13 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -13,7 +13,7 @@ from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.models import AttributeType, ShapeType from datumaro.util import cast -from datumaro.util.image import Image +from datumaro.util.image import ByteImage, Image from .annotation import AnnotationManager, TrackManager @@ -457,18 +457,37 @@ def __init__(self, task_data, include_images=False, include_outside=False): dm_items = [] + is_video = task_data.meta['task']['mode'] == 'interpolation' + ext = '' + if is_video: + ext = FrameProvider.VIDEO_FRAME_EXT if include_images: frame_provider = FrameProvider(task_data.db_task.data) + if is_video: + # optimization for videos: use numpy arrays instead of bytes + # some formats or transforms can require image data + def _make_image(i, **kwargs): + loader = lambda _: frame_provider.get_frame(i, + quality=frame_provider.Quality.ORIGINAL, + out_type=frame_provider.Type.NUMPY_ARRAY)[0] + return Image(loader=loader, **kwargs) + else: + # for images use encoded data to avoid recoding + def _make_image(i, **kwargs): + loader = lambda _: frame_provider.get_frame(i, + quality=frame_provider.Quality.ORIGINAL, + out_type=frame_provider.Type.BUFFER)[0].getvalue() + return ByteImage(data=loader, **kwargs) for frame_data in task_data.group_by_frame(include_empty=True): - loader = None + image_args = { + 'path': frame_data.name + ext, + 'size': (frame_data.height, frame_data.width), + } if include_images: - loader = lambda p, i=frame_data.idx: frame_provider.get_frame(i, - quality=frame_provider.Quality.ORIGINAL, - out_type=frame_provider.Type.NUMPY_ARRAY)[0] - dm_image = Image(path=frame_data.name, loader=loader, - size=(frame_data.height, frame_data.width) - ) + dm_image = _make_image(frame_data.idx, **image_args) + else: + dm_image = Image(**image_args) dm_anno = self._read_cvat_anno(frame_data, task_data) dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0], annotations=dm_anno, image=dm_image, diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 3c349947b769..632da163de8a 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -531,6 +531,10 @@ def _export(dst_file, task_data, anno_callback, save_images=False): anno_callback(f, task_data) if save_images: + ext = '' + if task_data.meta['task']['mode'] == 'interpolation': + ext = FrameProvider.VIDEO_FRAME_EXT + img_dir = osp.join(temp_dir, 'images') frame_provider = FrameProvider(task_data.db_task.data) frames = frame_provider.get_frames( @@ -538,9 +542,6 @@ def _export(dst_file, task_data, anno_callback, save_images=False): frame_provider.Type.BUFFER) for frame_id, (frame_data, _) in enumerate(frames): frame_name = task_data.frame_info[frame_id]['path'] - ext = '' - if not '.' in osp.basename(frame_name): - ext = '.png' img_path = osp.join(img_dir, frame_name + ext) os.makedirs(osp.dirname(img_path), exist_ok=True) with open(img_path, 'wb') as f: diff --git a/cvat/apps/engine/frame_provider.py b/cvat/apps/engine/frame_provider.py index ed96bf99f3c0..3870ef3d1048 100644 --- a/cvat/apps/engine/frame_provider.py +++ b/cvat/apps/engine/frame_provider.py @@ -6,6 +6,7 @@ from enum import Enum from io import BytesIO +import cv2 import numpy as np from PIL import Image @@ -43,6 +44,9 @@ def reset(self): self.pos = -1 class FrameProvider: + VIDEO_FRAME_EXT = '.PNG' + VIDEO_FRAME_MIME = 'image/png' + class Quality(Enum): COMPRESSED = 0 ORIGINAL = 100 @@ -129,13 +133,14 @@ def _validate_chunk_number(self, chunk_number): return chunk_number_ - @staticmethod - def _av_frame_to_png_bytes(av_frame): - pil_img = av_frame.to_image() - buf = BytesIO() - pil_img.save(buf, format='PNG') - buf.seek(0) - return buf + @classmethod + def _av_frame_to_png_bytes(cls, av_frame): + ext = cls.VIDEO_FRAME_EXT + image = av_frame.to_ndarray(format='bgr24') + success, result = cv2.imencode(ext, image) + if not success: + raise Exception("Failed to encode image to '%s' format" % (ext)) + return BytesIO(result.tobytes()) def _convert_frame(self, frame, reader_class, out_type): if out_type == self.Type.BUFFER: @@ -144,11 +149,11 @@ def _convert_frame(self, frame, reader_class, out_type): return frame.to_image() if reader_class is VideoReader else Image.open(frame) elif out_type == self.Type.NUMPY_ARRAY: if reader_class is VideoReader: - image = np.array(frame.to_image()) + image = frame.to_ndarray(format='bgr24') else: image = np.array(Image.open(frame)) - if len(image.shape) == 3 and image.shape[2] in {3, 4}: - image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR + if len(image.shape) == 3 and image.shape[2] in {3, 4}: + image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR return image else: raise Exception('unsupported output type') @@ -171,7 +176,7 @@ def get_frame(self, frame_number, quality=Quality.ORIGINAL, frame = self._convert_frame(frame, loader.reader_class, out_type) if loader.reader_class is VideoReader: - return (frame, 'image/png') + return (frame, self.VIDEO_FRAME_MIME) return (frame, mimetypes.guess_type(frame_name)) def get_frames(self, quality=Quality.ORIGINAL, out_type=Type.BUFFER): diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index d9eead2b9f7e..21430838661d 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -244,6 +244,8 @@ def get_progress(self, pos): return pos / stream.duration if stream.duration else None def _get_av_container(self): + if isinstance(self._source_path[0], io.BytesIO): + self._source_path[0].seek(0) # required for re-reading return av.open(self._source_path[0]) def get_preview(self): diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index 59406651fec9..60613b4356ca 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -44,4 +44,4 @@ tensorflow==2.2.1 # Optional requirement of Datumaro # archives. Don't use as a python module because it has GPL license. patool==1.12 diskcache==5.0.2 -git+https://github.com/openvinotoolkit/datumaro@v0.1.0 \ No newline at end of file +git+https://github.com/openvinotoolkit/datumaro@v0.1.2 \ No newline at end of file