Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Manifest #2763

Merged
merged 37 commits into from
Mar 24, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
5335bfe
Added support for manifest file
Marishka17 Feb 2, 2021
417e82f
Added data migration
Marishka17 Feb 2, 2021
eca3465
Updated tests
Marishka17 Feb 2, 2021
23792f1
Changed script for manually preparing
Marishka17 Feb 2, 2021
971ebfe
Fixs
Marishka17 Feb 5, 2021
d8afb6e
Fixed paths
Marishka17 Feb 5, 2021
6ead5c7
some fix & licence headers
Marishka17 Feb 5, 2021
2cd2972
Fixes
Marishka17 Feb 17, 2021
79d7a36
Fix stop_frame saving
Marishka17 Feb 19, 2021
64f9ec9
Merge branch 'upstream/develop' into mk/manifest
Marishka17 Feb 20, 2021
97c1746
Update migration
Marishka17 Feb 20, 2021
cbe1066
Fix codacy
Marishka17 Feb 20, 2021
01c940d
Bandit issue & json instead marshal
Marishka17 Feb 24, 2021
56ea59d
Merge branch 'develop' into mk/manifest
Marishka17 Feb 24, 2021
7081a96
f
Marishka17 Feb 24, 2021
83c01ed
pylint fixes
Marishka17 Feb 25, 2021
e54ce6f
Merge branch 'upstream/develop' into mk/manifest
Marishka17 Feb 25, 2021
934c63e
Update CHANGELOG
Marishka17 Feb 25, 2021
7b83517
Some fixes
Marishka17 Mar 2, 2021
d8c8606
Update manifest documentation
Marishka17 Mar 3, 2021
94d5d68
Merge branch 'upstream/develop' into mk/manifest
Marishka17 Mar 3, 2021
a643853
Fix create.py
Marishka17 Mar 3, 2021
eb88ed0
Fix case with 3d data
Marishka17 Mar 4, 2021
4284cac
Modify migration
Marishka17 Mar 9, 2021
d31510f
Fixed migration
Marishka17 Mar 9, 2021
65e4b12
Refactored script to manually prepare manifest
Marishka17 Mar 16, 2021
2d4f456
Update documentation
Marishka17 Mar 17, 2021
3afa428
Merge branch 'upstream/develop' into mk/manifest
Marishka17 Mar 17, 2021
38b0d0f
Fix some comments
Marishka17 Mar 19, 2021
83ca74d
Merge branch 'upstream/develop' into mk/manifest
Marishka17 Mar 22, 2021
c7bbd47
Fix
Marishka17 Mar 22, 2021
9c3a81d
One more fix
Marishka17 Mar 23, 2021
7e9389b
Merge branch 'develop' into mk/manifest
Mar 23, 2021
d16022a
Update README
Marishka17 Mar 23, 2021
3e58fa2
Revert prettier changes
Marishka17 Mar 23, 2021
c0b8a53
Merge branch 'mk/manifest' of https://github.com/opencv/cvat into mk/…
Marishka17 Mar 23, 2021
71d1b9a
Update utils/dataset_manifest/README.md
Mar 24, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 18 additions & 11 deletions cvat/apps/engine/cache.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2020 Intel Corporation
# Copyright (C) 2021 Intel Corporation
#
# SPDX-License-Identifier: MIT

Expand All @@ -9,9 +9,9 @@
from django.conf import settings

from cvat.apps.engine.media_extractors import (Mpeg4ChunkWriter,
Mpeg4CompressedChunkWriter, ZipChunkWriter, ZipCompressedChunkWriter)
Mpeg4CompressedChunkWriter, ZipChunkWriter, ZipCompressedChunkWriter,
IDatasetManifestReader, VDatasetManifestReader)
zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved
from cvat.apps.engine.models import DataChoice, StorageChoice
from cvat.apps.engine.prepare import PrepareInfo
from cvat.apps.engine.models import DimensionType

class CacheInteraction:
Expand Down Expand Up @@ -51,17 +51,24 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number):
StorageChoice.LOCAL: db_data.get_upload_dirname(),
StorageChoice.SHARE: settings.SHARE_ROOT
}[db_data.storage]
if os.path.exists(db_data.get_meta_path()):
if hasattr(db_data, 'video'):
source_path = os.path.join(upload_dir, db_data.video.path)
meta = PrepareInfo(source_path=source_path, meta_path=db_data.get_meta_path())
for frame in meta.decode_needed_frames(chunk_number, db_data):
images.append(frame)
writer.save_as_chunk([(image, source_path, None) for image in images], buff)
reader = VDatasetManifestReader(manifest_path=db_data.get_manifest_path(),
source_path=source_path, chunk_number=chunk_number,
chunk_size=db_data.chunk_size, start=db_data.start_frame,
stop=db_data.stop_frame,step=db_data.get_frame_step())
zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved
for frame in reader:
images.append((frame, source_path, None))
else:
with open(db_data.get_dummy_chunk_path(chunk_number), 'r') as dummy_file:
images = [os.path.join(upload_dir, line.strip()) for line in dummy_file]
writer.save_as_chunk([(image, image, None) for image in images], buff)
reader = IDatasetManifestReader(manifest_path=db_data.get_manifest_path(),
chunk_number=chunk_number,chunk_size=db_data.chunk_size,
start=db_data.start_frame, stop=db_data.stop_frame,
step=db_data.get_frame_step())
for item in reader:
source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}")
images.append((source_path, source_path, None))

writer.save_as_chunk(images, buff)
buff.seek(0)
return buff, mime_type

Expand Down
90 changes: 90 additions & 0 deletions cvat/apps/engine/media_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
ImageFile.LOAD_TRUNCATED_IMAGES = True

from cvat.apps.engine.mime_types import mimetypes
from utils.dataset_manifest import VManifestManager, IManifestManager
from utils.dataset_manifest.core import WorkWithVideo
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest renaming this class to something more descriptive.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Marishka17 , a class name shouldn't start with a verb. It should describe a type of entity. Could you please rename?


def get_mime(name):
for type_name, type_def in MEDIA_TYPES.items():
Expand Down Expand Up @@ -121,6 +123,10 @@ def get_image_size(self, i):
img = Image.open(self._source_path[i])
return img.width, img.height

@property
def absolute_source_paths(self):
return [self.get_path(idx) for idx, _ in enumerate(self._source_path)]

class DirectoryReader(ImageListReader):
def __init__(self, source_path, step=1, start=0, stop=None):
image_paths = []
Expand Down Expand Up @@ -311,6 +317,90 @@ def get_image_size(self, i):
image = (next(iter(self)))[0]
return image.width, image.height

class FragmentMediaReader:
def __init__(self, chunk_number, chunk_size, start, stop, step=1, *args, **kwargs):
zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved
self._start = start
self._stop = stop + 1 # up to the last inclusive
self._step = step
self._chunk_number = chunk_number
self._chunk_size = chunk_size
self._start_chunk_frame_number = self._start + self._chunk_number * self._chunk_size * self._step
zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved
self._end_chunk_frame_number = min(self._start_chunk_frame_number + (self._chunk_size - 1) * self._step + 1, self._stop)
self._frame_range = self._get_frame_range()

@property
def frame_range(self):
return self._frame_range

def _get_frame_range(self):
frame_range = []
for idx in range(self._start, self._stop, self._step):
if idx < self._start_chunk_frame_number:
continue
elif idx < self._end_chunk_frame_number and not ((idx - self._start_chunk_frame_number) % self._step):
frame_range.append(idx)
elif (idx - self._start_chunk_frame_number) % self._step:
continue
else:
break
return frame_range

class IDatasetManifestReader(FragmentMediaReader):
def __init__(self, manifest_path, **kwargs):
super().__init__(**kwargs)
self._manifest = IManifestManager(manifest_path)
zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved
self._manifest.init_index()

def __iter__(self):
for idx in self._frame_range:
yield self._manifest[idx]

class VDatasetManifestReader(WorkWithVideo, FragmentMediaReader):
def __init__(self, manifest_path, **kwargs):
WorkWithVideo.__init__(self, **kwargs)
FragmentMediaReader.__init__(self, **kwargs)
self._manifest = VManifestManager(manifest_path)
self._manifest.init_index()

def _get_nearest_left_key_frame(self):
start_decode_frame_number = 0
start_decode_timestamp = 0
for _, frame in self._manifest:
zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved
frame_number, timestamp = frame.get('number'), frame.get('pts')
zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved
if int(frame_number) <= self._start_chunk_frame_number:
start_decode_frame_number = frame_number
start_decode_timestamp = timestamp
else:
break
return int(start_decode_frame_number), int(start_decode_timestamp)

def __iter__(self):
start_decode_frame_number, start_decode_timestamp = self._get_nearest_left_key_frame()
container = self._open_video_container(self.source_path, mode='r')
video_stream = self._get_video_stream(container)
container.seek(offset=start_decode_timestamp, stream=video_stream)

frame_number = start_decode_frame_number - 1
for packet in container.demux(video_stream):
for frame in packet.decode():
frame_number += 1
if frame_number in self._frame_range:
if video_stream.metadata.get('rotate'):
frame = av.VideoFrame().from_ndarray(
rotate_image(
frame.to_ndarray(format='bgr24'),
360 - int(container.streams.video[0].metadata.get('rotate'))
),
format ='bgr24'
)
yield frame
elif frame_number < self._frame_range[-1]:
continue
else:
self._close_video_container(container)
return
self._close_video_container(container)

class IChunkWriter(ABC):
def __init__(self, quality, dimension=DimensionType.DIM_2D):
self._image_quality = quality
Expand Down
46 changes: 46 additions & 0 deletions cvat/apps/engine/migrations/0037_auto_20210127_1354.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Generated by Django 3.1.1 on 2021-01-29 14:39

from django.db import migrations
from cvat.apps.engine.models import StorageMethodChoice, StorageChoice
from django.conf import settings
from utils.dataset_manifest import prepare_meta, VManifestManager, IManifestManager
import glob
import os

def migrate_data(apps, shema_editor):
Data = apps.get_model("engine", "Data")
nmanovic marked this conversation as resolved.
Show resolved Hide resolved
query_set = Data.objects.filter(storage_method=StorageMethodChoice.CACHE)
for db_data in query_set:
upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id)
data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT
if hasattr(db_data, 'video'):
media_file = os.path.join(data_dir, db_data.video.path)
meta_info, _ = prepare_meta(
data_type='video',
media_file=media_file,
)
manifest = VManifestManager(manifest_path=upload_dir)
manifest.create(meta_info)
manifest.init_index()
if os.path.exists(os.path.join(upload_dir, 'meta_info.txt')):
os.remove(os.path.join(upload_dir, 'meta_info.txt'))
else:
sources = [os.path.join(data_dir, db_image.path) for db_image in db_data.images.all().order_by('frame')]
# or better to get all possible needed info from db?
meta_info = prepare_meta(data_type='images', sources=sources, data_dir=data_dir)
manifest = IManifestManager(manifest_path=upload_dir)
manifest.create(meta_info.content)
manifest.init_index()
for path in glob.glob(f'{upload_dir}/dummy_*.txt'):
os.remove(path)


class Migration(migrations.Migration):

dependencies = [
('engine', '0036_auto_20201216_0943'),
]

operations = [
migrations.RunPython(migrate_data)
]
9 changes: 4 additions & 5 deletions cvat/apps/engine/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,10 @@ def get_compressed_chunk_path(self, chunk_number):
def get_preview_path(self):
return os.path.join(self.get_data_dirname(), 'preview.jpeg')

def get_meta_path(self):
return os.path.join(self.get_upload_dirname(), 'meta_info.txt')

def get_dummy_chunk_path(self, chunk_number):
return os.path.join(self.get_upload_dirname(), 'dummy_{}.txt'.format(chunk_number))
def get_manifest_path(self):
return os.path.join(self.get_upload_dirname(), 'manifest.jsonl')
def get_index_path(self):
return os.path.join(self.get_upload_dirname(), 'index')

class Video(models.Model):
data = models.OneToOneField(Data, on_delete=models.CASCADE, related_name="video", null=True)
Expand Down
Loading