Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support cloud storage #3326

Merged
merged 36 commits into from
Jun 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
6ddd6c1
Add: simple base server part to support cloud storages
Marishka17 Dec 21, 2020
63c12f1
Fix: save only necessary info, credentials after code redesign
Marishka17 Dec 23, 2020
627398d
Deleted unnecessary & some changes & some fixes
Marishka17 Dec 25, 2020
9324ad1
Fix
Marishka17 Dec 25, 2020
a7399f3
Add(cache): support files on cloud storage
Marishka17 Dec 25, 2020
3cb547e
tmp
Marishka17 Dec 25, 2020
61398f8
Merge branch 'develop' into mk/support_cloud_storage
Marishka17 Dec 25, 2020
b177bed
Fix
Marishka17 Jan 12, 2021
3be1fc7
Revert prettier changes
Marishka17 Mar 1, 2021
da8b583
Merge branch 'upstream/develop' into mk/support_cloud_storage
Marishka17 Mar 1, 2021
9cecd39
Merge branch 'develop' into mk/support_cloud_storage
Marishka17 Mar 25, 2021
82adc0b
tmp
Marishka17 Apr 15, 2021
089d6a8
Merge branch 'upstream/develop' into mk/support_cloud_storage
Marishka17 Apr 15, 2021
8af9a39
Fix bucket public access
Marishka17 Apr 15, 2021
9caff79
Update migration dependency
Marishka17 Apr 15, 2021
6413906
Fix pylint issues
Marishka17 Apr 15, 2021
91c0e42
Some fixes & bandit & add specific attr
Marishka17 Apr 22, 2021
8c39675
Some fixes
Marishka17 May 4, 2021
04388a3
Fix returned response after re-requesting storage creation
Marishka17 May 5, 2021
a12aaa7
Some fixes & size restriction fixes
Marishka17 May 6, 2021
39ab59c
Merge branch 'upstream/develop' into mk/support_cloud_storage
Marishka17 May 6, 2021
2d1073e
Add display name
Marishka17 May 7, 2021
92a044d
Merge branch 'upstream/develop' into mk/support_cloud_storage
Marishka17 May 7, 2021
c6e7b23
Merge branch 'develop' into mk/support_cloud_storage
Marishka17 May 11, 2021
b5386e8
Revert changes
Marishka17 May 12, 2021
b7f6fec
Merge branch 'upstream/develop' into mk/support_cloud_storage
Marishka17 May 14, 2021
a81b0da
Fix comments
Marishka17 May 19, 2021
4b3b2e9
Merge branch 'upstream/develop' into mk/support_cloud_storage
Marishka17 May 19, 2021
1ac7b51
Add validator for specific attributes
Marishka17 May 20, 2021
a93dbab
Allow blank for description field
Marishka17 May 20, 2021
c910c3c
Update CHANGELOG
Marishka17 May 20, 2021
2f73319
Change error display
Marishka17 May 21, 2021
4ac72cc
Redirect error in case when storage doesn't exist
Marishka17 May 21, 2021
faaefb9
fix
Marishka17 May 31, 2021
e68d733
Merge remote-tracking branch 'origin/develop' into nm/cloud-storage-s…
Jun 15, 2021
511eb51
Fixed pylint warnings about unused imports
Jun 15, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Support of context images for 2D image tasks (<https://github.com/openvinotoolkit/cvat/pull/3122>)
- Support of cloud storage without copying data into CVAT: server part (<https://github.com/openvinotoolkit/cvat/pull/2620>)
- Filter `is_active` for user list (<https://github.com/openvinotoolkit/cvat/pull/3235>)
- Ability to export/import tasks (<https://github.com/openvinotoolkit/cvat/pull/3056>)

Expand Down
25 changes: 25 additions & 0 deletions cvat/apps/authentication/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ def is_comment_author(db_user, db_comment):
has_rights = (db_comment.author == db_user)
return has_rights

@rules.predicate
def is_cloud_storage_owner(db_user, db_storage):
return db_storage.owner == db_user

# AUTH PERMISSIONS RULES
rules.add_perm('engine.role.user', has_user_role)
rules.add_perm('engine.role.admin', has_admin_role)
Expand Down Expand Up @@ -190,6 +194,9 @@ def is_comment_author(db_user, db_comment):
rules.add_perm('engine.comment.change', has_admin_role | is_comment_author)


rules.add_perm('engine.cloudstorage.create', has_admin_role | has_user_role)
rules.add_perm('engine.cloudstorage.change', has_admin_role | is_cloud_storage_owner)

class AdminRolePermission(BasePermission):
# pylint: disable=no-self-use
def has_permission(self, request, view):
Expand Down Expand Up @@ -329,3 +336,21 @@ class CommentChangePermission(BasePermission):
def has_object_permission(self, request, view, obj):
return request.user.has_perm('engine.comment.change', obj)

class CloudStorageAccessPermission(BasePermission):
# pylint: disable=no-self-use
def has_object_permission(self, request, view, obj):
return request.user.has_perm("engine.cloudstorage.change", obj)

class CloudStorageChangePermission(BasePermission):
# pylint: disable=no-self-use
def has_object_permission(self, request, view, obj):
return request.user.has_perm("engine.cloudstorage.change", obj)

class CloudStorageGetQuerySetMixin(object):
def get_queryset(self):
queryset = super().get_queryset()
user = self.request.user
if has_admin_role(user) or self.detail:
return queryset
else:
return queryset.filter(owner=user)
18 changes: 15 additions & 3 deletions cvat/apps/engine/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
# SPDX-License-Identifier: MIT

from django.contrib import admin
from .models import Task, Segment, Job, Label, AttributeSpec, Project
from .models import Task, Segment, Job, Label, AttributeSpec, Project, CloudStorage

class JobInline(admin.TabularInline):
model = Job
can_delete = False

# Don't show extra lines to add an object
def has_add_permission(self, request, object=None):
def has_add_permission(self, request, obj):
return False

class SegmentInline(admin.TabularInline):
Expand All @@ -21,7 +21,7 @@ class SegmentInline(admin.TabularInline):
can_delete = False

# Don't show extra lines to add an object
def has_add_permission(self, request, object=None):
def has_add_permission(self, request, obj):
return False


Expand Down Expand Up @@ -84,8 +84,20 @@ class TaskAdmin(admin.ModelAdmin):
def has_add_permission(self, request):
return False

class CloudStorageAdmin(admin.ModelAdmin):
date_hierarchy = 'updated_date'
readonly_fields = ('created_date', 'updated_date', 'provider_type')
list_display = ('__str__', 'resource', 'owner', 'created_date', 'updated_date')
search_fields = ('provider_type', 'display_name', 'resource', 'owner__username', 'owner__first_name',
'owner__last_name', 'owner__email',)

empty_value_display = 'unknown'

def has_add_permission(self, request):
return False

admin.site.register(Task, TaskAdmin)
admin.site.register(Segment, SegmentAdmin)
admin.site.register(Label, LabelAdmin)
admin.site.register(Project, ProjectAdmin)
admin.site.register(CloudStorage, CloudStorageAdmin)
48 changes: 42 additions & 6 deletions cvat/apps/engine/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@

from diskcache import Cache
from django.conf import settings
from tempfile import NamedTemporaryFile

from cvat.apps.engine.log import slogger
from cvat.apps.engine.media_extractors import (Mpeg4ChunkWriter,
Mpeg4CompressedChunkWriter, ZipChunkWriter, ZipCompressedChunkWriter,
ImageDatasetManifestReader, VideoDatasetManifestReader)
from cvat.apps.engine.models import DataChoice, StorageChoice
from cvat.apps.engine.models import DimensionType

from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials
from cvat.apps.engine.utils import md5_hash
class CacheInteraction:
def __init__(self, dimension=DimensionType.DIM_2D):
self._cache = Cache(settings.CACHE_ROOT)
Expand Down Expand Up @@ -49,10 +52,12 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number):
buff = BytesIO()
upload_dir = {
StorageChoice.LOCAL: db_data.get_upload_dirname(),
StorageChoice.SHARE: settings.SHARE_ROOT
StorageChoice.SHARE: settings.SHARE_ROOT,
StorageChoice.CLOUD_STORAGE: db_data.get_upload_dirname(),
}[db_data.storage]
if hasattr(db_data, 'video'):
source_path = os.path.join(upload_dir, db_data.video.path)

reader = VideoDatasetManifestReader(manifest_path=db_data.get_manifest_path(),
source_path=source_path, chunk_number=chunk_number,
chunk_size=db_data.chunk_size, start=db_data.start_frame,
Expand All @@ -64,12 +69,43 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number):
chunk_number=chunk_number, chunk_size=db_data.chunk_size,
start=db_data.start_frame, stop=db_data.stop_frame,
step=db_data.get_frame_step())
for item in reader:
source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}")
images.append((source_path, source_path, None))

if db_data.storage == StorageChoice.CLOUD_STORAGE:
db_cloud_storage = db_data.cloud_storage
credentials = Credentials()
credentials.convert_from_db({
'type': db_cloud_storage.credentials_type,
'value': db_cloud_storage.credentials,
})
details = {
'resource': db_cloud_storage.resource,
'credentials': credentials,
'specific_attributes': db_cloud_storage.get_specific_attributes()
}
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
cloud_storage_instance.initialize_content()
for item in reader:
name = f"{item['name']}{item['extension']}"
if name not in cloud_storage_instance:
raise Exception('{} file was not found on a {} storage'.format(name, cloud_storage_instance.name))
with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=name, delete=False) as temp_file:
source_path = temp_file.name
buf = cloud_storage_instance.download_fileobj(name)
temp_file.write(buf.getvalue())
if not (checksum := item.get('checksum', None)):
slogger.glob.warning('A manifest file does not contain checksum for image {}'.format(item.get('name')))
if checksum and not md5_hash(source_path) == checksum:
slogger.glob.warning('Hash sums of files {} do not match'.format(name))
images.append((source_path, source_path, None))
else:
for item in reader:
source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}")
images.append((source_path, source_path, None))
writer.save_as_chunk(images, buff)
buff.seek(0)
if db_data.storage == StorageChoice.CLOUD_STORAGE:
images = [image_path for image in images if os.path.exists((image_path := image[0]))]
for image_path in images:
os.remove(image_path)
return buff, mime_type

def save_chunk(self, db_data_id, chunk_number, quality, buff, mime_type):
Expand Down
Loading