Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added ability to correct upload video with a rotation record in the metadata #2218

Merged
merged 12 commits into from
Nov 5, 2020
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Ability to upload prepared meta information along with a video when creating a task (<https://github.com/openvinotoolkit/cvat/pull/2217>)
- Optional chaining plugin for cvat-canvas and cvat-ui (<https://github.com/openvinotoolkit/cvat/pull/2249>)
- MOTS png mask format support (<https://github.com/openvinotoolkit/cvat/pull/2198>)
- Ability to correct upload video with a rotation record in the metadata (<https://github.com/openvinotoolkit/cvat/pull/2218>)

### Changed
- UI models (like DEXTR) were redesigned to be more interactive (<https://github.com/opencv/cvat/pull/2054>)
Expand Down
21 changes: 20 additions & 1 deletion cvat/apps/engine/media_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import numpy as np
from pyunpack import Archive
from PIL import Image, ImageFile
from cvat.apps.engine.utils import rotate_image

# fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web
# see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file
Expand Down Expand Up @@ -228,6 +229,16 @@ def _decode(self, container):
for image in packet.decode():
frame_num += 1
if self._has_frame(frame_num - 1):
if packet.stream.metadata.get('rotate'):
old_image = image
image = av.VideoFrame().from_ndarray(
rotate_image(
image.to_ndarray(format='bgr24'),
360 - int(container.streams.video[0].metadata.get('rotate'))
),
format ='bgr24'
)
image.pts = old_image.pts
yield (image, self._source_path[0], image.pts)

def __iter__(self):
Expand All @@ -252,7 +263,15 @@ def get_preview(self):
container = self._get_av_container()
stream = container.streams.video[0]
preview = next(container.decode(stream))
return self._get_preview(preview.to_image())
return self._get_preview(preview.to_image() if not stream.metadata.get('rotate') \
else av.VideoFrame().from_ndarray(
rotate_image(
preview.to_ndarray(format='bgr24'),
360 - int(container.streams.video[0].metadata.get('rotate'))
),
format ='bgr24'
).to_image()
)

def get_image_size(self, i):
image = (next(iter(self)))[0]
Expand Down
28 changes: 27 additions & 1 deletion cvat/apps/engine/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from collections import OrderedDict
import hashlib
import os
from cvat.apps.engine.utils import rotate_image

class WorkWithVideo:
def __init__(self, **kwargs):
Expand All @@ -24,7 +25,6 @@ def _get_video_stream(self, container):
video_stream.thread_type = 'AUTO'
return video_stream


class AnalyzeVideo(WorkWithVideo):
def check_type_first_frame(self):
container = self._open_video_container(self.source_path, mode='r')
Expand Down Expand Up @@ -76,7 +76,17 @@ def get_task_size(self):

@property
def frame_sizes(self):
container = self._open_video_container(self.source_path, 'r')
frame = next(iter(self.key_frames.values()))
if container.streams.video[0].metadata.get('rotate'):
frame = av.VideoFrame().from_ndarray(
rotate_image(
frame.to_ndarray(format='bgr24'),
360 - int(container.streams.video[0].metadata.get('rotate'))
),
format ='bgr24'
)
self._close_video_container(container)
return (frame.width, frame.height)

def check_key_frame(self, container, video_stream, key_frame):
Expand Down Expand Up @@ -150,6 +160,14 @@ def decode_needed_frames(self, chunk_number, db_data):
if frame_number < start_chunk_frame_number:
continue
elif frame_number < end_chunk_frame_number and not ((frame_number - start_chunk_frame_number) % step):
if video_stream.metadata.get('rotate'):
frame = av.VideoFrame().from_ndarray(
rotate_image(
frame.to_ndarray(format='bgr24'),
360 - int(container.streams.video[0].metadata.get('rotate'))
),
format ='bgr24'
)
yield frame
elif (frame_number - start_chunk_frame_number) % step:
continue
Expand Down Expand Up @@ -177,6 +195,14 @@ def frame_sizes(self):
container.seek(offset=next(iter(self.key_frames.values())), stream=video_stream)
for packet in container.demux(video_stream):
for frame in packet.decode():
if video_stream.metadata.get('rotate'):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

code duplication?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No

frame = av.VideoFrame().from_ndarray(
rotate_image(
frame.to_ndarray(format='bgr24'),
360 - int(container.streams.video[0].metadata.get('rotate'))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a couple of questions here:

  • what the method returns: video_stream.metadata.get('rotate')?
  • what is the difference between previous expression and container.streams.video[0].metadata.get('rotate')
  • can container.streams.video[0].metadata.get('rotate') be always converted to int?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what the method returns: video_stream.metadata.get('rotate')?

string of rotation angle

what is the difference between previous expression and container.streams.video[0].metadata.get('rotate')

this is analogue

can container.streams.video[0].metadata.get('rotate') be always converted to int?

I checked for some values like 60, 90, 180, 270.. and in such cases - yes

),
format ='bgr24'
)
self._close_video_container(container)
return (frame.width, frame.height)

Expand Down
1 change: 1 addition & 0 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ def update_progress(progress):

if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
for media_type, media_files in media.items():

if not media_files:
continue

Expand Down
Binary file not shown.
53 changes: 52 additions & 1 deletion cvat/apps/engine/tests/test_rest_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1556,6 +1556,16 @@ def setUpClass(cls):
video.write(data.read())
cls._image_sizes[filename] = img_sizes

filename = "test_rotated_90_video.mp4"
path = os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4')
container = av.open(path, 'r')
for frame in container.decode(video=0):
# pyav ignores rotation record in metadata when decoding frames
img_sizes = [(frame.height, frame.width)] * container.streams.video[0].frames
break
container.close()
cls._image_sizes[filename] = img_sizes

filename = os.path.join("videos", "test_video_1.mp4")
path = os.path.join(settings.SHARE_ROOT, filename)
os.makedirs(os.path.dirname(path))
Expand Down Expand Up @@ -2013,7 +2023,7 @@ def _test_api_v1_tasks_id_data(self, user):
os.path.join(settings.SHARE_ROOT, "videos")
)
task_spec = {
"name": "my video with meta info task #11",
"name": "my video with meta info task #13",
"overlap": 0,
"segment_size": 0,
"labels": [
Expand All @@ -2032,6 +2042,47 @@ def _test_api_v1_tasks_id_data(self, user):
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.VIDEO,
self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)

task_spec = {
"name": "my cached video task #14",
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}

task_data = {
"client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'),
"image_quality": 70,
"use_zip_chunks": True
}

image_sizes = self._image_sizes['test_rotated_90_video.mp4']
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.FILE_SYSTEM)

task_spec = {
"name": "my video task #15",
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}

task_data = {
"client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'),
"image_quality": 70,
"use_cache": True,
"use_zip_chunks": True
}

image_sizes = self._image_sizes['test_rotated_90_video.mp4']
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)

def test_api_v1_tasks_id_data_admin(self):
self._test_api_v1_tasks_id_data(self.admin)

Expand Down
14 changes: 14 additions & 0 deletions cvat/apps/engine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-License-Identifier: MIT

import ast
import cv2 as cv
from collections import namedtuple
import importlib
import sys
Expand Down Expand Up @@ -74,3 +75,16 @@ def av_scan_paths(*paths):
res = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if res.returncode:
raise ValidationError(res.stdout)

def rotate_image(image, angle):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we support rotation which isn't multiply pi/2 by an integer? Is it a real case to rotate a frame by 30 degrees? Is it possible at all?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Artificially (write the desired value in the metadata) you can create such a video.

height, width = image.shape[:2]
image_center = (width/2, height/2)
matrix = cv.getRotationMatrix2D(image_center, angle, 1.)
abs_cos = abs(matrix[0,0])
abs_sin = abs(matrix[0,1])
bound_w = int(height * abs_sin + width * abs_cos)
bound_h = int(height * abs_cos + width * abs_sin)
matrix[0, 2] += bound_w/2 - image_center[0]
matrix[1, 2] += bound_h/2 - image_center[1]
matrix = cv.warpAffine(image, matrix, (bound_w, bound_h))
return matrix