cvat-ai · nmanovic · Nov 5, 2020 · Sep 24, 2020 · Oct 7, 2020 · Oct 7, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,7 +8,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [1.2.0] - Unreleased
 
 ### Added
-
 - Removed Z-Order flag from task creation process
 - Ability to login into CVAT-UI with token from api/v1/auth/login (<https://github.com/openvinotoolkit/cvat/pull/2234>)
 - Added layout grids toggling ('ctrl + alt + Enter')
@@ -29,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Ability to upload prepared meta information along with a video when creating a task (<https://github.com/openvinotoolkit/cvat/pull/2217>)
 - Optional chaining plugin for cvat-canvas and cvat-ui (<https://github.com/openvinotoolkit/cvat/pull/2249>)
 - MOTS png mask format support (<https://github.com/openvinotoolkit/cvat/pull/2198>)
+- Ability to correct upload video with a rotation record in the metadata (<https://github.com/openvinotoolkit/cvat/pull/2218>)
 
 ### Changed
 

@@ -14,6 +14,7 @@
 import numpy as np
 from pyunpack import Archive
 from PIL import Image, ImageFile
+from cvat.apps.engine.utils import rotate_image
 
 # fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web
 # see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file
@@ -228,6 +229,16 @@ def _decode(self, container):
                 for image in packet.decode():
                     frame_num += 1
                     if self._has_frame(frame_num - 1):
+                        if packet.stream.metadata.get('rotate'):
+                            old_image = image
+                            image = av.VideoFrame().from_ndarray(
+                                rotate_image(
+                                    image.to_ndarray(format='bgr24'),
+                                    360 - int(container.streams.video[0].metadata.get('rotate'))
+                                ),
+                                format ='bgr24'
+                            )
+                            image.pts = old_image.pts
                         yield (image, self._source_path[0], image.pts)
 
     def __iter__(self):
@@ -252,7 +263,15 @@ def get_preview(self):
         container = self._get_av_container()
         stream = container.streams.video[0]
         preview = next(container.decode(stream))
-        return self._get_preview(preview.to_image())
+        return self._get_preview(preview.to_image() if not stream.metadata.get('rotate') \
+            else av.VideoFrame().from_ndarray(
+                rotate_image(
+                    preview.to_ndarray(format='bgr24'),
+                    360 - int(container.streams.video[0].metadata.get('rotate'))
+                ),
+                format ='bgr24'
+            ).to_image()
+        )
 
     def get_image_size(self, i):
         image = (next(iter(self)))[0]

@@ -6,6 +6,7 @@
 from collections import OrderedDict
 import hashlib
 import os
+from cvat.apps.engine.utils import rotate_image
 
 class WorkWithVideo:
     def __init__(self, **kwargs):
@@ -24,7 +25,6 @@ def _get_video_stream(self, container):
         video_stream.thread_type = 'AUTO'
         return video_stream
 
-
 class AnalyzeVideo(WorkWithVideo):
     def check_type_first_frame(self):
         container = self._open_video_container(self.source_path, mode='r')
@@ -76,7 +76,17 @@ def get_task_size(self):
 
     @property
     def frame_sizes(self):
+        container = self._open_video_container(self.source_path, 'r')
         frame = next(iter(self.key_frames.values()))
+        if container.streams.video[0].metadata.get('rotate'):
+            frame = av.VideoFrame().from_ndarray(
+                rotate_image(
+                    frame.to_ndarray(format='bgr24'),
+                    360 - int(container.streams.video[0].metadata.get('rotate'))
+                ),
+                format ='bgr24'
+            )
+        self._close_video_container(container)
         return (frame.width, frame.height)
 
     def check_key_frame(self, container, video_stream, key_frame):
@@ -150,6 +160,14 @@ def decode_needed_frames(self, chunk_number, db_data):
                 if frame_number < start_chunk_frame_number:
                     continue
                 elif frame_number < end_chunk_frame_number and not ((frame_number - start_chunk_frame_number) % step):
+                    if video_stream.metadata.get('rotate'):
+                        frame = av.VideoFrame().from_ndarray(
+                            rotate_image(
+                                frame.to_ndarray(format='bgr24'),
+                                360 - int(container.streams.video[0].metadata.get('rotate'))
+                            ),
+                            format ='bgr24'
+                        )
                     yield frame
                 elif (frame_number - start_chunk_frame_number) % step:
                     continue
@@ -177,6 +195,14 @@ def frame_sizes(self):
         container.seek(offset=next(iter(self.key_frames.values())), stream=video_stream)
         for packet in container.demux(video_stream):
             for frame in packet.decode():
+                if video_stream.metadata.get('rotate'):
+                    frame = av.VideoFrame().from_ndarray(
+                        rotate_image(
+                            frame.to_ndarray(format='bgr24'),
+                            360 - int(container.streams.video[0].metadata.get('rotate'))
+                        ),
+                        format ='bgr24'
+                    )
                 self._close_video_container(container)
                 return (frame.width, frame.height)
 

@@ -294,6 +294,7 @@ def update_progress(progress):
 
     if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
        for media_type, media_files in media.items():
+
             if not media_files:
                 continue
 

@@ -1548,6 +1548,16 @@ def setUpClass(cls):
             video.write(data.read())
         cls._image_sizes[filename] = img_sizes
 
+        filename = "test_rotated_90_video.mp4"
+        path = os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4')
+        container = av.open(path, 'r')
+        for frame in container.decode(video=0):
+            # pyav ignores rotation record in metadata when decoding frames
+            img_sizes = [(frame.height, frame.width)] * container.streams.video[0].frames
+            break
+        container.close()
+        cls._image_sizes[filename] = img_sizes
+
         filename = os.path.join("videos", "test_video_1.mp4")
         path = os.path.join(settings.SHARE_ROOT, filename)
         os.makedirs(os.path.dirname(path))
@@ -2003,7 +2013,7 @@ def _test_api_v1_tasks_id_data(self, user):
             os.path.join(settings.SHARE_ROOT, "videos")
         )
         task_spec = {
-            "name": "my video with meta info task #11",
+            "name": "my video with meta info task #13",
             "overlap": 0,
             "segment_size": 0,
             "labels": [
@@ -2022,6 +2032,47 @@ def _test_api_v1_tasks_id_data(self, user):
         self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.VIDEO,
                                             self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)
 
+        task_spec = {
+            "name": "my cached video task #14",
+            "overlap": 0,
+            "segment_size": 0,
+            "labels": [
+                {"name": "car"},
+                {"name": "person"},
+            ]
+        }
+
+        task_data = {
+            "client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'),
+            "image_quality": 70,
+            "use_zip_chunks": True
+        }
+
+        image_sizes = self._image_sizes['test_rotated_90_video.mp4']
+        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
+            self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.FILE_SYSTEM)
+
+        task_spec = {
+            "name": "my video task #15",
+            "overlap": 0,
+            "segment_size": 0,
+            "labels": [
+                {"name": "car"},
+                {"name": "person"},
+            ]
+        }
+
+        task_data = {
+            "client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'),
+            "image_quality": 70,
+            "use_cache": True,
+            "use_zip_chunks": True
+        }
+
+        image_sizes = self._image_sizes['test_rotated_90_video.mp4']
+        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
+            self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)
+
     def test_api_v1_tasks_id_data_admin(self):
         self._test_api_v1_tasks_id_data(self.admin)
 

@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: MIT
 
 import ast
+import cv2 as cv
 from collections import namedtuple
 import importlib
 import sys
@@ -74,3 +75,16 @@ def av_scan_paths(*paths):
         res = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         if res.returncode:
             raise ValidationError(res.stdout)
+
+def rotate_image(image, angle):
+    height, width = image.shape[:2]
+    image_center = (width/2, height/2)
+    matrix = cv.getRotationMatrix2D(image_center, angle, 1.)
+    abs_cos = abs(matrix[0,0])
+    abs_sin = abs(matrix[0,1])
+    bound_w = int(height * abs_sin + width * abs_cos)
+    bound_h = int(height * abs_cos + width * abs_sin)
+    matrix[0, 2] += bound_w/2 - image_center[0]
+    matrix[1, 2] += bound_h/2 - image_center[1]
+    matrix = cv.warpAffine(image, matrix, (bound_w, bound_h))
+    return matrix