Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix task creation with video file when there are no valid keyframes #7838

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Fixed

- Task creation from a video file without keyframes allowing for random iteration
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider rephrasing for clarity and grammatical correctness.

- Task creation from a video file without keyframes allowing for random iteration
+ Task creation from a video file without keyframes now allows for random iteration

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
- Task creation from a video file without keyframes allowing for random iteration
Task creation from a video file without keyframes now allows for random iteration

(<https://github.com/cvat-ai/cvat/pull/7838>)
101 changes: 67 additions & 34 deletions utils/dataset_manifest/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# SPDX-License-Identifier: MIT

from enum import Enum
from io import StringIO
from io import StringIO, BytesIO
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider removing unused imports to clean up the code.

- from io import StringIO, BytesIO
+ from io import BytesIO

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
from io import StringIO, BytesIO
from io import BytesIO

import av
import json
import os
Expand All @@ -13,12 +13,11 @@
from contextlib import closing
from PIL import Image
from json.decoder import JSONDecodeError
from io import BytesIO

from .errors import InvalidManifestError, InvalidVideoFrameError
from .errors import InvalidManifestError, InvalidVideoError
from .utils import SortingMethod, md5_hash, rotate_image, sort

from typing import Dict, List, Union, Optional
from typing import Dict, List, Union, Optional, Iterator, Tuple

class VideoStreamReader:
def __init__(self, source_path, chunk_size, force):
Expand All @@ -33,7 +32,7 @@ def __init__(self, source_path, chunk_size, force):
for frame in packet.decode():
# check type of first frame
if not frame.pict_type.name == 'I':
raise InvalidVideoFrameError('First frame is not key frame')
raise InvalidVideoError('The first frame is not a key frame')

# get video resolution
if video_stream.metadata.get('rotate'):
Expand Down Expand Up @@ -75,40 +74,66 @@ def validate_key_frame(self, container, video_stream, key_frame):
return False
return True

def __iter__(self):
with closing(av.open(self.source_path, mode='r')) as container:
video_stream = self._get_video_stream(container)
frame_pts, frame_dts = -1, -1
index, key_frame_number = 0, 0
for packet in container.demux(video_stream):
def __iter__(self) -> Iterator[Union[int, Tuple[int, int, str]]]:
"""
Iterate over video frames and yield key frames or indexes.

Yields:
Union[Tuple[int, int, str], int]: (frame index, frame timestamp, frame MD5) or frame index.
"""
# Open containers for reading frames and checking movement on them
with (
closing(av.open(self.source_path, mode='r')) as reading_container,
closing(av.open(self.source_path, mode='r')) as checking_container
):
reading_v_stream = self._get_video_stream(reading_container)
checking_v_stream = self._get_video_stream(checking_container)
prev_pts: Optional[int] = None
prev_dts: Optional[int] = None
index, key_frame_count = 0, 0

for packet in reading_container.demux(reading_v_stream):
for frame in packet.decode():
if None not in {frame.pts, frame_pts} and frame.pts <= frame_pts:
raise InvalidVideoFrameError('Invalid pts sequences')
if None not in {frame.dts, frame_dts} and frame.dts <= frame_dts:
raise InvalidVideoFrameError('Invalid dts sequences')
frame_pts, frame_dts = frame.pts, frame.dts
# Check PTS and DTS sequences for validity
if None not in {frame.pts, prev_pts} and frame.pts <= prev_pts:
raise InvalidVideoError('Detected non-increasing PTS sequence in the video')
if None not in {frame.dts, prev_dts} and frame.dts <= prev_dts:
raise InvalidVideoError('Detected non-increasing DTS sequence in the video')
prev_pts, prev_dts = frame.pts, frame.dts

if frame.key_frame:
key_frame_number += 1
ratio = (index + 1) // key_frame_number

if ratio >= self._upper_bound and not self._force:
raise AssertionError('Too few keyframes')

key_frame = {
'index': index,
key_frame_data = {
'pts': frame.pts,
'md5': md5_hash(frame)
'md5': md5_hash(frame),
}

with closing(av.open(self.source_path, mode='r')) as checked_container:
checked_container.seek(offset=key_frame['pts'], stream=video_stream)
isValid = self.validate_key_frame(checked_container, video_stream, key_frame)
if isValid:
yield (index, key_frame['pts'], key_frame['md5'])
# Check that it is possible to seek to this key frame using frame.pts
checking_container.seek(
offset=key_frame_data['pts'],
stream=checking_v_stream,
)
is_valid_key_frame = self.validate_key_frame(
checking_container,
checking_v_stream,
key_frame_data,
)

if is_valid_key_frame:
key_frame_count += 1
yield (index, key_frame_data['pts'], key_frame_data['md5'])
else:
yield index
else:
yield index

index += 1
key_frame_ratio = index // (key_frame_count or 1)

# Check if the number of key frames meets the upper bound
if key_frame_ratio >= self._upper_bound and not self._force:
raise InvalidVideoError('The number of keyframes is not enough for smooth iteration over the video')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider handling the scenario where there are too few keyframes more gracefully, possibly by logging a warning and continuing with a fallback mechanism.

- raise InvalidVideoError('The number of keyframes is not enough for smooth iteration over the video')
+ logging.warning('Detected insufficient keyframes for smooth iteration. Proceeding with reduced functionality.')

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
raise InvalidVideoError('The number of keyframes is not enough for smooth iteration over the video')
logging.warning('Detected insufficient keyframes for smooth iteration. Proceeding with reduced functionality.')


# Update frames number if not already set
Comment on lines +77 to +136
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The iterator method is complex and handles multiple scenarios. Consider refactoring to improve readability and maintainability.

if not self._frames_number:
self._frames_number = index

Expand Down Expand Up @@ -317,6 +342,9 @@ def __getitem__(self, number):
def __len__(self):
return len(self._index)

def is_empty(self) -> bool:
return not len(self)

Comment on lines +345 to +347
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The is_empty method in _Index class should use a more efficient check for emptiness.

- return not len(self)
+ return not bool(self._index)

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
def is_empty(self) -> bool:
return not len(self)
def is_empty(self) -> bool:
return not bool(self._index)

class _ManifestManager(ABC):
BASE_INFORMATION = {
'version' : 1,
Expand Down Expand Up @@ -405,10 +433,12 @@ def manifest(self):
return self._manifest

def __len__(self):
if hasattr(self, '_index'):
return len(self._index)
else:
return None
return len(self._index)

def is_empty(self) -> bool:
if self._index.is_empty():
self._index.load()
return self._index.is_empty()

def __getitem__(self, item):
if isinstance(item, slice):
Expand Down Expand Up @@ -482,6 +512,9 @@ def create(self, *, _tqdm=None): # pylint: disable=arguments-differ

self.set_index()

if self.is_empty() and not self._reader._force:
raise InvalidManifestError('Empty manifest file has been created')

Comment on lines +515 to +517
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raising an error for an empty manifest creation could be handled more gracefully. Consider allowing the user to decide how to proceed.

- raise InvalidManifestError('Empty manifest file has been created')
+ if self._user_prompt:
+     user_decision = input('Empty manifest file has been created. Would you like to proceed? (y/n): ')
+     if user_decision.lower() != 'y':
+         raise InvalidManifestError('Operation cancelled by the user.')

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
if self.is_empty() and not self._reader._force:
raise InvalidManifestError('Empty manifest file has been created')
if self.is_empty() and not self._reader._force:
if self._user_prompt:
user_decision = input('Empty manifest file has been created. Would you like to proceed? (y/n): ')
if user_decision.lower() != 'y':
raise InvalidManifestError('Operation cancelled by the user.')

def partial_update(self, number, properties):
pass

Expand Down
2 changes: 1 addition & 1 deletion utils/dataset_manifest/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class BasicError(Exception):
The basic exception type for all exceptions in the library
"""

class InvalidVideoFrameError(BasicError):
class InvalidVideoError(BasicError):
"""
Indicates an invalid video frame
"""
Expand Down
Loading