Skip to content

Commit

Permalink
Raw image copying in dataset export (#2229)
Browse files Browse the repository at this point in the history
* Raw image copying in dataset export

* use byteimage

* use opencv, swith frame data type for videos

* Fix image reading

* update dm dependency
  • Loading branch information
Maxim Zhiltsov authored Oct 7, 2020
1 parent 572b5f2 commit 84b8a85
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 23 deletions.
35 changes: 27 additions & 8 deletions cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from cvat.apps.engine.frame_provider import FrameProvider
from cvat.apps.engine.models import AttributeType, ShapeType
from datumaro.util import cast
from datumaro.util.image import Image
from datumaro.util.image import ByteImage, Image

from .annotation import AnnotationManager, TrackManager

Expand Down Expand Up @@ -457,18 +457,37 @@ def __init__(self, task_data, include_images=False, include_outside=False):

dm_items = []

is_video = task_data.meta['task']['mode'] == 'interpolation'
ext = ''
if is_video:
ext = FrameProvider.VIDEO_FRAME_EXT
if include_images:
frame_provider = FrameProvider(task_data.db_task.data)
if is_video:
# optimization for videos: use numpy arrays instead of bytes
# some formats or transforms can require image data
def _make_image(i, **kwargs):
loader = lambda _: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.NUMPY_ARRAY)[0]
return Image(loader=loader, **kwargs)
else:
# for images use encoded data to avoid recoding
def _make_image(i, **kwargs):
loader = lambda _: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.BUFFER)[0].getvalue()
return ByteImage(data=loader, **kwargs)

for frame_data in task_data.group_by_frame(include_empty=True):
loader = None
image_args = {
'path': frame_data.name + ext,
'size': (frame_data.height, frame_data.width),
}
if include_images:
loader = lambda p, i=frame_data.idx: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.NUMPY_ARRAY)[0]
dm_image = Image(path=frame_data.name, loader=loader,
size=(frame_data.height, frame_data.width)
)
dm_image = _make_image(frame_data.idx, **image_args)
else:
dm_image = Image(**image_args)
dm_anno = self._read_cvat_anno(frame_data, task_data)
dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, image=dm_image,
Expand Down
7 changes: 4 additions & 3 deletions cvat/apps/dataset_manager/formats/cvat.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,16 +531,17 @@ def _export(dst_file, task_data, anno_callback, save_images=False):
anno_callback(f, task_data)

if save_images:
ext = ''
if task_data.meta['task']['mode'] == 'interpolation':
ext = FrameProvider.VIDEO_FRAME_EXT

img_dir = osp.join(temp_dir, 'images')
frame_provider = FrameProvider(task_data.db_task.data)
frames = frame_provider.get_frames(
frame_provider.Quality.ORIGINAL,
frame_provider.Type.BUFFER)
for frame_id, (frame_data, _) in enumerate(frames):
frame_name = task_data.frame_info[frame_id]['path']
ext = ''
if not '.' in osp.basename(frame_name):
ext = '.png'
img_path = osp.join(img_dir, frame_name + ext)
os.makedirs(osp.dirname(img_path), exist_ok=True)
with open(img_path, 'wb') as f:
Expand Down
27 changes: 16 additions & 11 deletions cvat/apps/engine/frame_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from enum import Enum
from io import BytesIO

import cv2
import numpy as np
from PIL import Image

Expand Down Expand Up @@ -43,6 +44,9 @@ def reset(self):
self.pos = -1

class FrameProvider:
VIDEO_FRAME_EXT = '.PNG'
VIDEO_FRAME_MIME = 'image/png'

class Quality(Enum):
COMPRESSED = 0
ORIGINAL = 100
Expand Down Expand Up @@ -129,13 +133,14 @@ def _validate_chunk_number(self, chunk_number):

return chunk_number_

@staticmethod
def _av_frame_to_png_bytes(av_frame):
pil_img = av_frame.to_image()
buf = BytesIO()
pil_img.save(buf, format='PNG')
buf.seek(0)
return buf
@classmethod
def _av_frame_to_png_bytes(cls, av_frame):
ext = cls.VIDEO_FRAME_EXT
image = av_frame.to_ndarray(format='bgr24')
success, result = cv2.imencode(ext, image)
if not success:
raise Exception("Failed to encode image to '%s' format" % (ext))
return BytesIO(result.tobytes())

def _convert_frame(self, frame, reader_class, out_type):
if out_type == self.Type.BUFFER:
Expand All @@ -144,11 +149,11 @@ def _convert_frame(self, frame, reader_class, out_type):
return frame.to_image() if reader_class is VideoReader else Image.open(frame)
elif out_type == self.Type.NUMPY_ARRAY:
if reader_class is VideoReader:
image = np.array(frame.to_image())
image = frame.to_ndarray(format='bgr24')
else:
image = np.array(Image.open(frame))
if len(image.shape) == 3 and image.shape[2] in {3, 4}:
image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
if len(image.shape) == 3 and image.shape[2] in {3, 4}:
image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
return image
else:
raise Exception('unsupported output type')
Expand All @@ -171,7 +176,7 @@ def get_frame(self, frame_number, quality=Quality.ORIGINAL,

frame = self._convert_frame(frame, loader.reader_class, out_type)
if loader.reader_class is VideoReader:
return (frame, 'image/png')
return (frame, self.VIDEO_FRAME_MIME)
return (frame, mimetypes.guess_type(frame_name))

def get_frames(self, quality=Quality.ORIGINAL, out_type=Type.BUFFER):
Expand Down
2 changes: 2 additions & 0 deletions cvat/apps/engine/media_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,8 @@ def get_progress(self, pos):
return pos / stream.duration if stream.duration else None

def _get_av_container(self):
if isinstance(self._source_path[0], io.BytesIO):
self._source_path[0].seek(0) # required for re-reading
return av.open(self._source_path[0])

def get_preview(self):
Expand Down
2 changes: 1 addition & 1 deletion cvat/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ tensorflow==2.2.1 # Optional requirement of Datumaro
# archives. Don't use as a python module because it has GPL license.
patool==1.12
diskcache==5.0.2
git+https://github.com/openvinotoolkit/[email protected].0
git+https://github.com/openvinotoolkit/[email protected].2

0 comments on commit 84b8a85

Please sign in to comment.