cvat-ai · bsekachev · Jul 20, 2023 · Jul 19, 2023 · Jul 19, 2023 · Jul 19, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## \[2.5.1\] - 2023-07-19
+### Fixed
+- Memory leak related to unclosed av container (<https://github.com/opencv/cvat/pull/6501>)
+
 ## \[2.5.0] - 2023-07-05
 ### Added
 - Now CVAT supports project/task markdown description with additional assets

@@ -4,6 +4,6 @@
 
 from cvat.utils.version import get_version
 
-VERSION = (2, 5, 0, 'final', 0)
+VERSION = (2, 5, 1, 'final', 0)
 
 __version__ = get_version(VERSION)
@@ -429,32 +429,27 @@ def _has_frame(self, i):
 
         return False
 
-    def _decode(self, container):
-        frame_num = 0
-        for packet in container.demux():
-            if packet.stream.type == 'video':
+    def __iter__(self):
+        with self._get_av_container() as container:
+            stream = container.streams.video[0]
+            stream.thread_type = 'AUTO'
+            frame_num = 0
+            for packet in container.demux(stream):
                 for image in packet.decode():
                     frame_num += 1
                     if self._has_frame(frame_num - 1):
                         if packet.stream.metadata.get('rotate'):
-                            old_image = image
+                            pts = image.pts
                             image = av.VideoFrame().from_ndarray(
                                 rotate_image(
                                     image.to_ndarray(format='bgr24'),
-                                    360 - int(container.streams.video[0].metadata.get('rotate'))
+                                    360 - int(stream.metadata.get('rotate'))
                                 ),
                                 format ='bgr24'
                             )
-                            image.pts = old_image.pts
+                            image.pts = pts
                         yield (image, self._source_path[0], image.pts)
 
-    def __iter__(self):
-        container = self._get_av_container()
-        source_video_stream = container.streams.video[0]
-        source_video_stream.thread_type = 'AUTO'
-
-        return self._decode(container)
-
     def get_progress(self, pos):
         duration = self._get_duration()
         return pos / duration if duration else None
@@ -465,38 +460,38 @@ def _get_av_container(self):
         return av.open(self._source_path[0])
 
     def _get_duration(self):
-        container = self._get_av_container()
-        stream = container.streams.video[0]
-        duration = None
-        if stream.duration:
-            duration = stream.duration
-        else:
-            # may have a DURATION in format like "01:16:45.935000000"
-            duration_str = stream.metadata.get("DURATION", None)
-            tb_denominator = stream.time_base.denominator
-            if duration_str and tb_denominator:
-                _hour, _min, _sec = duration_str.split(':')
-                duration_sec = 60*60*float(_hour) + 60*float(_min) + float(_sec)
-                duration = duration_sec * tb_denominator
-        return duration
+        with self._get_av_container() as container:
+            stream = container.streams.video[0]
+            duration = None
+            if stream.duration:
+                duration = stream.duration
+            else:
+                # may have a DURATION in format like "01:16:45.935000000"
+                duration_str = stream.metadata.get("DURATION", None)
+                tb_denominator = stream.time_base.denominator
+                if duration_str and tb_denominator:
+                    _hour, _min, _sec = duration_str.split(':')
+                    duration_sec = 60*60*float(_hour) + 60*float(_min) + float(_sec)
+                    duration = duration_sec * tb_denominator
+            return duration
 
     def get_preview(self, frame):
-        container = self._get_av_container()
-        stream = container.streams.video[0]
-        tb_denominator = stream.time_base.denominator
-        needed_time = int((frame / stream.guessed_rate) * tb_denominator)
-        container.seek(offset=needed_time, stream=stream)
-        for packet in container.demux(stream):
-            for frame in packet.decode():
-                return self._get_preview(frame.to_image() if not stream.metadata.get('rotate') \
-                    else av.VideoFrame().from_ndarray(
-                        rotate_image(
-                            frame.to_ndarray(format='bgr24'),
-                            360 - int(container.streams.video[0].metadata.get('rotate'))
-                        ),
-                        format ='bgr24'
-                    ).to_image()
-                )
+        with self._get_av_container() as container:
+            stream = container.streams.video[0]
+            tb_denominator = stream.time_base.denominator
+            needed_time = int((frame / stream.guessed_rate) * tb_denominator)
+            container.seek(offset=needed_time, stream=stream)
+            for packet in container.demux(stream):
+                for frame in packet.decode():
+                    return self._get_preview(frame.to_image() if not stream.metadata.get('rotate') \
+                        else av.VideoFrame().from_ndarray(
+                            rotate_image(
+                                frame.to_ndarray(format='bgr24'),
+                                360 - int(container.streams.video[0].metadata.get('rotate'))
+                            ),
+                            format ='bgr24'
+                        ).to_image()
+                    )
 
     def get_image_size(self, i):
         image = (next(iter(self)))[0]
@@ -700,6 +695,8 @@ def save_as_chunk(
         return image_sizes
 
 class Mpeg4ChunkWriter(IChunkWriter):
+    FORMAT = 'mp4'
+
     def __init__(self, quality=67):
         # translate inversed range [1:100] to [0:51]
         quality = round(51 * (100 - quality) / 99)
@@ -722,21 +719,20 @@ def __init__(self, quality=67):
                 "preset": "ultrafast",
             }
 
-    def _create_av_container(self, path, w, h, rate, options, f='mp4'):
+    def _add_video_stream(self, container, w, h, rate, options):
         # x264 requires width and height must be divisible by 2 for yuv420p
         if h % 2:
             h += 1
         if w % 2:
             w += 1
 
-        container = av.open(path, 'w',format=f)
         video_stream = container.add_stream(self._codec_name, rate=rate)
         video_stream.pix_fmt = "yuv420p"
         video_stream.width = w
         video_stream.height = h
         video_stream.options = options
 
-        return container, video_stream
+        return video_stream
 
     def save_as_chunk(self, images, chunk_path):
         if not images:
@@ -745,16 +741,16 @@ def save_as_chunk(self, images, chunk_path):
         input_w = images[0][0].width
         input_h = images[0][0].height
 
-        output_container, output_v_stream = self._create_av_container(
-            path=chunk_path,
-            w=input_w,
-            h=input_h,
-            rate=self._output_fps,
-            options=self._codec_opts,
-        )
+        with av.open(chunk_path, 'w', format=self.FORMAT) as output_container:
+            output_v_stream = self._add_video_stream(
+                container=output_container,
+                w=input_w,
+                h=input_h,
+                rate=self._output_fps,
+                options=self._codec_opts,
+            )
 
-        self._encode_images(images, output_container, output_v_stream)
-        output_container.close()
+            self._encode_images(images, output_container, output_v_stream)
         return [(input_w, input_h)]
 
     @staticmethod
@@ -797,16 +793,16 @@ def save_as_chunk(self, images, chunk_path):
         output_h = input_h // downscale_factor
         output_w = input_w // downscale_factor
 
-        output_container, output_v_stream = self._create_av_container(
-            path=chunk_path,
-            w=output_w,
-            h=output_h,
-            rate=self._output_fps,
-            options=self._codec_opts,
-        )
+        with av.open(chunk_path, 'w', format=self.FORMAT) as output_container:
+            output_v_stream = self._add_video_stream(
+                container=output_container,
+                w=output_w,
+                h=output_h,
+                rate=self._output_fps,
+                options=self._codec_opts,
+            )
 
-        self._encode_images(images, output_container, output_v_stream)
-        output_container.close()
+            self._encode_images(images, output_container, output_v_stream)
         return [(input_w, input_h)]
 
 def _is_archive(path):

@@ -1,7 +1,7 @@
 openapi: 3.0.3
 info:
   title: CVAT REST API
-  version: '2.5'
+  version: 2.5.1
   description: REST API for Computer Vision Annotation Tool (CVAT)
   termsOfService: https://www.google.com/policies/terms/
   contact:

@@ -25,7 +25,7 @@ services:
 
   cvat_server:
     container_name: cvat_server
-    image: cvat/server:${CVAT_VERSION:-v2.5.0}
+    image: cvat/server:${CVAT_VERSION:-v2.5.1}
     restart: always
     depends_on:
       - cvat_redis
@@ -64,7 +64,7 @@ services:
 
   cvat_utils:
     container_name: cvat_utils
-    image: cvat/server:${CVAT_VERSION:-v2.5.0}
+    image: cvat/server:${CVAT_VERSION:-v2.5.1}
     restart: always
     depends_on:
       - cvat_redis
@@ -89,7 +89,7 @@ services:
 
   cvat_worker_import:
     container_name: cvat_worker_import
-    image: cvat/server:${CVAT_VERSION:-v2.5.0}
+    image: cvat/server:${CVAT_VERSION:-v2.5.1}
     restart: always
     depends_on:
       - cvat_redis
@@ -112,7 +112,7 @@ services:
 
   cvat_worker_export:
     container_name: cvat_worker_export
-    image: cvat/server:${CVAT_VERSION:-v2.5.0}
+    image: cvat/server:${CVAT_VERSION:-v2.5.1}
     restart: always
     depends_on:
       - cvat_redis
@@ -135,7 +135,7 @@ services:
 
   cvat_worker_annotation:
     container_name: cvat_worker_annotation
-    image: cvat/server:${CVAT_VERSION:-v2.5.0}
+    image: cvat/server:${CVAT_VERSION:-v2.5.1}
     restart: always
     depends_on:
       - cvat_redis
@@ -158,7 +158,7 @@ services:
 
   cvat_worker_webhooks:
     container_name: cvat_worker_webhooks
-    image: cvat/server:${CVAT_VERSION:-v2.5.0}
+    image: cvat/server:${CVAT_VERSION:-v2.5.1}
     restart: always
     depends_on:
       - cvat_redis
@@ -182,7 +182,7 @@ services:
 
   cvat_worker_quality_reports:
     container_name: cvat_worker_quality_reports
-    image: cvat/server:${CVAT_VERSION:-v2.5.0}
+    image: cvat/server:${CVAT_VERSION:-v2.5.1}
     restart: always
     depends_on:
       - cvat_redis
@@ -204,7 +204,7 @@ services:
 
   cvat_ui:
     container_name: cvat_ui
-    image: cvat/ui:${CVAT_VERSION:-v2.5.0}
+    image: cvat/ui:${CVAT_VERSION:-v2.5.1}
     restart: always
     depends_on:
       - cvat_server

@@ -85,7 +85,7 @@ cvat:
       additionalVolumeMounts: []
     replicas: 1
     image: cvat/server
-    tag: v2.5.0
+    tag: v2.5.1
     imagePullPolicy: Always
     permissionFix:
       enabled: true
@@ -105,7 +105,7 @@ cvat:
   frontend:
     replicas: 1
     image: cvat/ui
-    tag: v2.5.0
+    tag: v2.5.1
     imagePullPolicy: Always
     labels: {}
     #  test: test