Skip to content

Commit

Permalink
Support cloud storage on server (#3326)
Browse files Browse the repository at this point in the history
Co-authored-by: Maya <[email protected]>
  • Loading branch information
Nikita Manovich and Marishka17 authored Jun 16, 2021
1 parent 8dfb21d commit b18482b
Show file tree
Hide file tree
Showing 16 changed files with 905 additions and 38 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Support of context images for 2D image tasks (<https://github.com/openvinotoolkit/cvat/pull/3122>)
- Support of cloud storage without copying data into CVAT: server part (<https://github.com/openvinotoolkit/cvat/pull/2620>)
- Filter `is_active` for user list (<https://github.com/openvinotoolkit/cvat/pull/3235>)
- Ability to export/import tasks (<https://github.com/openvinotoolkit/cvat/pull/3056>)

Expand Down
25 changes: 25 additions & 0 deletions cvat/apps/authentication/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ def is_comment_author(db_user, db_comment):
has_rights = (db_comment.author == db_user)
return has_rights

@rules.predicate
def is_cloud_storage_owner(db_user, db_storage):
return db_storage.owner == db_user

# AUTH PERMISSIONS RULES
rules.add_perm('engine.role.user', has_user_role)
rules.add_perm('engine.role.admin', has_admin_role)
Expand Down Expand Up @@ -190,6 +194,9 @@ def is_comment_author(db_user, db_comment):
rules.add_perm('engine.comment.change', has_admin_role | is_comment_author)


rules.add_perm('engine.cloudstorage.create', has_admin_role | has_user_role)
rules.add_perm('engine.cloudstorage.change', has_admin_role | is_cloud_storage_owner)

class AdminRolePermission(BasePermission):
# pylint: disable=no-self-use
def has_permission(self, request, view):
Expand Down Expand Up @@ -329,3 +336,21 @@ class CommentChangePermission(BasePermission):
def has_object_permission(self, request, view, obj):
return request.user.has_perm('engine.comment.change', obj)

class CloudStorageAccessPermission(BasePermission):
# pylint: disable=no-self-use
def has_object_permission(self, request, view, obj):
return request.user.has_perm("engine.cloudstorage.change", obj)

class CloudStorageChangePermission(BasePermission):
# pylint: disable=no-self-use
def has_object_permission(self, request, view, obj):
return request.user.has_perm("engine.cloudstorage.change", obj)

class CloudStorageGetQuerySetMixin(object):
def get_queryset(self):
queryset = super().get_queryset()
user = self.request.user
if has_admin_role(user) or self.detail:
return queryset
else:
return queryset.filter(owner=user)
18 changes: 15 additions & 3 deletions cvat/apps/engine/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
# SPDX-License-Identifier: MIT

from django.contrib import admin
from .models import Task, Segment, Job, Label, AttributeSpec, Project
from .models import Task, Segment, Job, Label, AttributeSpec, Project, CloudStorage

class JobInline(admin.TabularInline):
model = Job
can_delete = False

# Don't show extra lines to add an object
def has_add_permission(self, request, object=None):
def has_add_permission(self, request, obj):
return False

class SegmentInline(admin.TabularInline):
Expand All @@ -21,7 +21,7 @@ class SegmentInline(admin.TabularInline):
can_delete = False

# Don't show extra lines to add an object
def has_add_permission(self, request, object=None):
def has_add_permission(self, request, obj):
return False


Expand Down Expand Up @@ -84,8 +84,20 @@ class TaskAdmin(admin.ModelAdmin):
def has_add_permission(self, request):
return False

class CloudStorageAdmin(admin.ModelAdmin):
date_hierarchy = 'updated_date'
readonly_fields = ('created_date', 'updated_date', 'provider_type')
list_display = ('__str__', 'resource', 'owner', 'created_date', 'updated_date')
search_fields = ('provider_type', 'display_name', 'resource', 'owner__username', 'owner__first_name',
'owner__last_name', 'owner__email',)

empty_value_display = 'unknown'

def has_add_permission(self, request):
return False

admin.site.register(Task, TaskAdmin)
admin.site.register(Segment, SegmentAdmin)
admin.site.register(Label, LabelAdmin)
admin.site.register(Project, ProjectAdmin)
admin.site.register(CloudStorage, CloudStorageAdmin)
48 changes: 42 additions & 6 deletions cvat/apps/engine/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@

from diskcache import Cache
from django.conf import settings
from tempfile import NamedTemporaryFile

from cvat.apps.engine.log import slogger
from cvat.apps.engine.media_extractors import (Mpeg4ChunkWriter,
Mpeg4CompressedChunkWriter, ZipChunkWriter, ZipCompressedChunkWriter,
ImageDatasetManifestReader, VideoDatasetManifestReader)
from cvat.apps.engine.models import DataChoice, StorageChoice
from cvat.apps.engine.models import DimensionType

from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials
from cvat.apps.engine.utils import md5_hash
class CacheInteraction:
def __init__(self, dimension=DimensionType.DIM_2D):
self._cache = Cache(settings.CACHE_ROOT)
Expand Down Expand Up @@ -49,10 +52,12 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number):
buff = BytesIO()
upload_dir = {
StorageChoice.LOCAL: db_data.get_upload_dirname(),
StorageChoice.SHARE: settings.SHARE_ROOT
StorageChoice.SHARE: settings.SHARE_ROOT,
StorageChoice.CLOUD_STORAGE: db_data.get_upload_dirname(),
}[db_data.storage]
if hasattr(db_data, 'video'):
source_path = os.path.join(upload_dir, db_data.video.path)

reader = VideoDatasetManifestReader(manifest_path=db_data.get_manifest_path(),
source_path=source_path, chunk_number=chunk_number,
chunk_size=db_data.chunk_size, start=db_data.start_frame,
Expand All @@ -64,12 +69,43 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number):
chunk_number=chunk_number, chunk_size=db_data.chunk_size,
start=db_data.start_frame, stop=db_data.stop_frame,
step=db_data.get_frame_step())
for item in reader:
source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}")
images.append((source_path, source_path, None))

if db_data.storage == StorageChoice.CLOUD_STORAGE:
db_cloud_storage = db_data.cloud_storage
credentials = Credentials()
credentials.convert_from_db({
'type': db_cloud_storage.credentials_type,
'value': db_cloud_storage.credentials,
})
details = {
'resource': db_cloud_storage.resource,
'credentials': credentials,
'specific_attributes': db_cloud_storage.get_specific_attributes()
}
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
cloud_storage_instance.initialize_content()
for item in reader:
name = f"{item['name']}{item['extension']}"
if name not in cloud_storage_instance:
raise Exception('{} file was not found on a {} storage'.format(name, cloud_storage_instance.name))
with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=name, delete=False) as temp_file:
source_path = temp_file.name
buf = cloud_storage_instance.download_fileobj(name)
temp_file.write(buf.getvalue())
if not (checksum := item.get('checksum', None)):
slogger.glob.warning('A manifest file does not contain checksum for image {}'.format(item.get('name')))
if checksum and not md5_hash(source_path) == checksum:
slogger.glob.warning('Hash sums of files {} do not match'.format(name))
images.append((source_path, source_path, None))
else:
for item in reader:
source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}")
images.append((source_path, source_path, None))
writer.save_as_chunk(images, buff)
buff.seek(0)
if db_data.storage == StorageChoice.CLOUD_STORAGE:
images = [image_path for image in images if os.path.exists((image_path := image[0]))]
for image_path in images:
os.remove(image_path)
return buff, mime_type

def save_chunk(self, db_data_id, chunk_number, quality, buff, mime_type):
Expand Down
Loading

0 comments on commit b18482b

Please sign in to comment.