From 6ddd6c1317cff6236b86dc8281cd5cce75aefcda Mon Sep 17 00:00:00 2001 From: Maya Date: Tue, 22 Dec 2020 00:15:51 +0300 Subject: [PATCH 01/26] Add: simple base server part to support cloud storages --- cvat/apps/authentication/auth.py | 38 +++ cvat/apps/engine/admin.py | 14 +- cvat/apps/engine/cloud_provider.py | 306 ++++++++++++++++++ .../migrations/0036_auto_20201218_1751.py | 44 +++ cvat/apps/engine/models.py | 71 +++- cvat/apps/engine/serializers.py | 78 ++++- cvat/apps/engine/task.py | 32 +- cvat/apps/engine/urls.py | 1 + cvat/apps/engine/views.py | 159 ++++++++- cvat/requirements/base.txt | 4 +- cvat/settings/base.py | 3 + 11 files changed, 740 insertions(+), 10 deletions(-) create mode 100644 cvat/apps/engine/cloud_provider.py create mode 100644 cvat/apps/engine/migrations/0036_auto_20201218_1751.py diff --git a/cvat/apps/authentication/auth.py b/cvat/apps/authentication/auth.py index 5e19efb7609c..d00df10f2341 100644 --- a/cvat/apps/authentication/auth.py +++ b/cvat/apps/authentication/auth.py @@ -159,6 +159,10 @@ def is_comment_author(db_user, db_comment): has_rights = (db_comment.author == db_user) return has_rights +@rules.predicate +def is_cloud_storage_owner(db_user, db_storage): + return db_storage.owner == db_user + # AUTH PERMISSIONS RULES rules.add_perm('engine.role.user', has_user_role) rules.add_perm('engine.role.admin', has_admin_role) @@ -190,6 +194,12 @@ def is_comment_author(db_user, db_comment): rules.add_perm('engine.comment.change', has_admin_role | is_comment_author) +rules.add_perm('engine.cloudstorage.create', has_admin_role | has_user_role) +rules.add_perm('engine.cloudstorage.change', has_admin_role | is_cloud_storage_owner) +# dry +rules.add_perm('engine.cloudstorage.access', has_admin_role | is_cloud_storage_owner) +rules.add_perm('engine.cloudstorage.delete', has_admin_role | is_cloud_storage_owner) + class AdminRolePermission(BasePermission): # pylint: disable=no-self-use def has_permission(self, request, view): @@ -329,3 +339,31 @@ class CommentChangePermission(BasePermission): def has_object_permission(self, request, view, obj): return request.user.has_perm('engine.comment.change', obj) +class CloudStorageCreatePermission(BasePermission): + # pylint: disable=no-self-use + def has_permission(self, request, view): + return request.user.has_perm("engine.cloudstorage.create") + +class CloudStorageAccessPermission(BasePermission): + # pylint: disable=no-self-use + def has_object_permission(self, request, view, obj): + return request.user.has_perm("engine.cloudstorage.change", obj) + +class CloudStorageChangePermission(BasePermission): + # pylint: disable=no-self-use + def has_object_permission(self, request, view, obj): + return request.user.has_perm("engine.cloudstorage.change", obj) + +class CloudStorageDeletePermission(BasePermission): + # pylint: disable=no-self-use + def has_object_permission(self, request, view, obj): + return request.user.has_perm("engine.cloudstorage.change", obj) + +class CloudStorageGetQuerySetMixin(object): + def get_queryset(self): + queryset = super().get_queryset() + user = self.request.user + if has_admin_role(user) or self.detail: + return queryset + else: + return queryset.filter(owner=user) diff --git a/cvat/apps/engine/admin.py b/cvat/apps/engine/admin.py index ddacf69ab027..ddebe5020684 100644 --- a/cvat/apps/engine/admin.py +++ b/cvat/apps/engine/admin.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: MIT from django.contrib import admin -from .models import Task, Segment, Job, Label, AttributeSpec, Project +from .models import Task, Segment, Job, Label, AttributeSpec, Project, CloudStorage class JobInline(admin.TabularInline): model = Job @@ -84,8 +84,20 @@ class TaskAdmin(admin.ModelAdmin): def has_add_permission(self, request): return False +class CloudStorageAdmin(admin.ModelAdmin): + date_hierarchy = 'updated_date' + readonly_fields = ('created_date', 'updated_date', 'provider_type') + list_display = ('__str__', 'owner', 'created_date', 'updated_date') + search_fields = ('provider_type', 'resource_name', 'owner__username', 'owner__first_name', + 'owner__last_name', 'owner__email',) + + empty_value_display = 'unknown' + + def has_add_permission(self, request): + return False admin.site.register(Task, TaskAdmin) admin.site.register(Segment, SegmentAdmin) admin.site.register(Label, LabelAdmin) admin.site.register(Project, ProjectAdmin) +admin.site.register(CloudStorage, CloudStorageAdmin) diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py new file mode 100644 index 000000000000..677b0603aeb8 --- /dev/null +++ b/cvat/apps/engine/cloud_provider.py @@ -0,0 +1,306 @@ +#from dataclasses import dataclass +from abc import ABC, abstractmethod, abstractproperty +from io import BytesIO + +import boto3 +from boto3.s3.transfer import TransferConfig +from botocore.exceptions import WaiterError + +from azure.storage.blob import BlobServiceClient +from azure.core.exceptions import ResourceExistsError +from azure.storage.blob import PublicAccess + +from cvat.apps.engine.log import slogger +from cvat.apps.engine.models import CredentialsTypeChoice, CloudProviderChoice + +class CloudStorage(ABC): + + @abstractmethod + def create(self): + pass + + @abstractmethod + def is_exist(self): + pass + + # @abstractmethod + # def head(self): + # pass + + # @abstractproperty + # def supported_files(self): + # pass + + @abstractproperty + def content(self): + pass + + @abstractmethod + def initialize_content(self): + pass + + @abstractmethod + def download_file(self, key): + pass + + @abstractmethod + def upload_file(self, file_obj, file_name): + pass + +def get_cloud_storage_instance(cloud_provider, **details): + instance = None + if cloud_provider == str(CloudProviderChoice.AWS_S3): + instance = AWS_S3( + bucket=details.get('resource_name'), + session_token=details.get('session_token'), + key_id=details.get('key'), + secret_key=details.get('secret_key') + ) + elif cloud_provider == str(CloudProviderChoice.AZURE_CONTAINER): + instance = AzureBlobContainer( + container_name=details.get('resource_name'), + sas_token=details.get('session_token'), + accounr_name=details.get('key'), + account_access_key=details.get('secret_key') + ) + return instance + +class AWS_S3(CloudStorage): + def __init__(self, **kwargs): + assert (bucket_name := kwargs.get('bucket')), 'Bucket name was not found' + self._bucket_name = bucket_name + + key_id, secret_key = None, None + if (session_token := kwargs.get('session_token')): + assert (key_id := kwargs.get('key_id')), 'Key id was not found' + assert (secret_key := kwargs.get('secret_key')), 'Secret key was not found' + + self._client_s3 = boto3.client( + 's3', + aws_access_key_id=key_id, + aws_secret_access_key=secret_key, + aws_session_token=session_token + ) + + self._s3 = boto3.resource('s3') + self._bucket = self._s3.Bucket(bucket_name) + self._files = [] + + @property + def bucket(self): + return self._bucket + + @property + def bucket_name(self): + return self._bucket_name + + @property + def content(self): + return map(lambda x: x.key ,self._files) + + # def is_object_exist(self, verifiable='bucket_exist', config=None): + # waiter = self._client_s3.get_waiter(verifiable) + # waiter.wait(**config) + + def is_exist(self): + waiter = self._client_s3.get_waiter('bucket_exists') + try: + waiter.wait( + Bucket=self._bucket_name, + WaiterConfig={ + 'Delay': 10, # The amount of time in seconds to wait between attempts. Default: 5 + 'MaxAttempts': 10 # The maximum number of attempts to be made. Default: 20 + } + ) + except WaiterError: + raise Exception('A resource {} unavailable'.format(self._bucket_name)) + + def is_object_exist(self, key_object): + waiter = self._client_s3.get_waiter('object_exists') + try: + waiter.wait( + Bucket=self._bucket, + Key=key_object, + WaiterConfig={ + 'Delay': 10, + 'MaxAttempts': 10, + }, + ) + except WaiterError: + raise Exception('A file {} unavailable'.format(key_object)) + + + def __len__(self): + return len(self._files) + + def __contains__(self, file_name): + return file_name in (item.key for item in self._files.values()) + + def head(self): + pass + + # @property + # def supported_files(self): + # pass + + def upload_file(self, file_obj, file_name): + self._bucket.upload_fileobj( + Fileobj=file_obj, + Key=file_name, + Config=TransferConfig(max_io_queue=10) + ) + + def initialize_content(self): + #TODO: оставить только нужную информацию :D + self._files = list(self._bucket.objects.all()) + + def download_file(self, key): + buf = BytesIO() + with open(buf,'wb') as file_buf: + self.bucket.download_fileobj( + Key=key, + Fileobj=file_buf, + Config=TransferConfig(max_io_queue=10) + )# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig + return buf + + def create(self): + try: + _ = self._bucket.create( + ACL='private', + CreateBucketConfiguration={ + 'LocationConstraint': 'us-east-2',#TODO + }, + ObjectLockEnabledForBucket=False + ) + except Exception as ex:#botocore.errorfactory.BucketAlreadyExists + msg = str(ex) + slogger.glob.info(msg) + raise Exception(str(ex)) + +class AzureBlobContainer(CloudStorage): + + def __init__(self, **kwargs): + assert (container_name := kwargs.get('container_name')), 'Container name was not found' + assert (account_name := kwargs.get('account_name')), 'Account name was not found' + assert (credentials := kwargs.get('sas_token') if kwargs.get('sas_token') else kwargs.get('account_access_key')), 'Credentials were not granted' + + self._blob_service_client = BlobServiceClient(account_url=self.account_url, credential=credentials) + self._container_client = self._blob_service_client.get_container_client(container_name) + + self._account_name = account_name + self._files = [] + + @property + def container(self): + return self._container + + @property + def account_url(self): + return "{}.blob.core.windows.net".format(self._account_name) + + def create(self): + try: + self._container_client.create_container( + metadata={ + 'type' : 'created by CVAT', + }, + public_access=PublicAccess.OFF + ) + except ResourceExistsError: + msg = f"{self._container_client.container_name} alredy exists" + slogger.glob.info(msg) + raise Exception(msg) + + def is_exist(self): + try: + self._container_client.create_container() + self._container_client.delete_container() + return False + except ResourceExistsError: + return True + + def is_object_exist(self, file_name): + blob_client = self._container_client.get_blob_client(file_name) + return blob_client.exists() + + def head(self): + pass + + # @property + # def supported_files(self): + # pass + + def upload_file(self, file_obj, file_name): + self._container_client.upload_blob(name=file_name, data=file_obj) + + + # def multipart_upload(self, file_obj): + # pass + + def initialize_content(self): + self._files = self._container_client.list_blobs() + + @property + def content(self): + return self._files + + def download_file(self, key): + MAX_CONCURRENCY = 3 + storage_stream_downloader = self._container_client.download_blob( + blob=key, + offset=None, + length=None, + ) + return storage_stream_downloader.content_as_bytes(max_concurrency=MAX_CONCURRENCY) + + +class GOOGLE_DRIVE(CloudStorage): + pass + +class Credentials: + __slots__ = ('key', 'secret_key', 'session_token', 'credentials_type') + + def __init__(self, **credentials): + self.key = credentials.get('key', None) + self.secret_key = credentials.get('secret_key', None) + self.session_token = credentials.get('session_token', None) + self.credentials_type = credentials.get('credentials_type', None) + + def convert_to_db(self): + converted_credentials = { + CredentialsTypeChoice.TOKEN : self.session_token, + CredentialsTypeChoice.KEY_TOKEN_PAIR : " ".join([self.key, self.session_token]), + CredentialsTypeChoice.KEY_SECRET_KEY_PAIR : " ".join([self.key, self.secret_key]) + } + return converted_credentials[self.credentials_type] + + def convert_from_db(self, credentials): + self.credentials_type = credentials.get('type') + if self.credentials_type == CredentialsTypeChoice.TOKEN: + self.session_token = credentials.get('value') + else: + self.key, second = credentials.get('value').split() + if self.credentials_type == CredentialsTypeChoice.KEY_TOKEN_PAIR: + self.session_token = second + else: self.secret_key = second + + def mapping_with_new_values(self, credentials): + # credentials = { + # 'type' : string, optional + # 'key' : string, optional + # 'secret_key': string, optional + # 'session_token': string, optional + # } + + if hasattr(credentials, 'type'): + self.credentials_type = credentials.get('type') + if hasattr(credentials, 'key'): + self.key = credentials.get('key') + elif hasattr(credentials, 'secret_key'): + self.secret_key = credentials.get('secret_key') + elif hasattr(credentials, 'session_token'): + self.session_token = credentials.get('session_token') + + def values(self): + return [self.key, self.secret_key, self.session_token] diff --git a/cvat/apps/engine/migrations/0036_auto_20201218_1751.py b/cvat/apps/engine/migrations/0036_auto_20201218_1751.py new file mode 100644 index 000000000000..a7c05ae012ce --- /dev/null +++ b/cvat/apps/engine/migrations/0036_auto_20201218_1751.py @@ -0,0 +1,44 @@ +# Generated by Django 3.1.1 on 2020-12-18 17:51 + +import cvat.apps.engine.models +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('engine', '0035_data_storage'), + ] + + operations = [ + migrations.AlterField( + model_name='data', + name='storage', + field=models.CharField(choices=[('cloud_storage', 'CLOUD_STORAGE'), ('local', 'LOCAL'), ('share', 'SHARE')], default=cvat.apps.engine.models.StorageChoice['LOCAL'], max_length=15), + ), + migrations.CreateModel( + name='CloudStorage', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('provider_type', models.CharField(choices=[('AWS_S3_BUCKET', 'AWS_S3'), ('AZURE_CONTAINER', 'AZURE_CONTAINER'), ('GOOGLE_DRIVE', 'GOOGLE_DRIVE')], max_length=20)), + ('resource_name', models.CharField(max_length=50)), + ('created_date', models.DateTimeField(auto_now_add=True)), + ('updated_date', models.DateTimeField(auto_now=True)), + ('credentials', models.CharField(max_length=100, unique=True)), + ('credentials_type', models.CharField(choices=[('TOKEN', 'TOKEN'), ('KEY_TOKEN_PAIR', 'KEY_TOKEN_PAIR'), ('KEY_SECRET_KEY_PAIR', 'KEY_SECRET_KEY_PAIR')], max_length=20)), + ('owner', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='cloud_storages', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'default_permissions': (), + 'unique_together': {('provider_type', 'resource_name', 'credentials')}, + }, + ), + migrations.AddField( + model_name='data', + name='cloud_storage', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='data', to='engine.cloudstorage'), + ), + ] diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index da8ca1bf3571..0c59f06a5f64 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -55,7 +55,7 @@ def __str__(self): return self.value class StorageChoice(str, Enum): - #AWS_S3 = 'aws_s3_bucket' + CLOUD_STORAGE = 'cloud_storage' LOCAL = 'local' SHARE = 'share' @@ -79,6 +79,7 @@ class Data(models.Model): default=DataChoice.IMAGESET) storage_method = models.CharField(max_length=15, choices=StorageMethodChoice.choices(), default=StorageMethodChoice.FILE_SYSTEM) storage = models.CharField(max_length=15, choices=StorageChoice.choices(), default=StorageChoice.LOCAL) + cloud_storage = models.ForeignKey('CloudStorage', on_delete=models.SET_NULL, null=True, related_name='data') class Meta: default_permissions = () @@ -257,7 +258,7 @@ class ServerFile(models.Model): class Meta: default_permissions = () -# For URLs +# For URLs and files on remote cloud storages class RemoteFile(models.Model): data = models.ForeignKey(Data, on_delete=models.CASCADE, null=True, related_name='remote_files') file = models.CharField(max_length=1024) @@ -481,3 +482,69 @@ class Comment(models.Model): message = models.TextField(default='') created_date = models.DateTimeField(auto_now_add=True) updated_date = models.DateTimeField(auto_now=True) + +class CloudProviderChoice(str, Enum): + AWS_S3 = 'AWS_S3_BUCKET' + AZURE_CONTAINER = 'AZURE_CONTAINER' + GOOGLE_DRIVE = 'GOOGLE_DRIVE' + + @classmethod + def choices(cls): + return tuple((x.value, x.name) for x in cls) + + @classmethod + def list(cls): + return list(map(lambda x: x.value, cls)) + + def __str__(self): + return self.value + +class CredentialsTypeChoice(str, Enum): + TOKEN = 'TOKEN' + KEY_TOKEN_PAIR = 'KEY_TOKEN_PAIR' + KEY_SECRET_KEY_PAIR = 'KEY_SECRET_KEY_PAIR' + + @classmethod + def choices(cls): + return tuple((x.value, x.name) for x in cls) + + @classmethod + def list(cls): + return list(map(lambda x: x.value, cls)) + + def __str__(self): + return self.value + +class CloudStorage(models.Model): + provider_type = models.CharField(max_length=20, choices=CloudProviderChoice.choices()) + resource_name = models.CharField(max_length=50) + owner = models.ForeignKey(User, null=True, blank=True, + on_delete=models.SET_NULL, related_name="cloud_storages") + created_date = models.DateTimeField(auto_now_add=True) + updated_date = models.DateTimeField(auto_now=True) + credentials = models.CharField(max_length=100, unique=True) + credentials_type = models.CharField(max_length=20, choices=CredentialsTypeChoice.choices())#auth_type + + class Meta: + default_permissions = () + unique_together = (('provider_type', 'resource_name', 'credentials'),) + + # def __str__(self): + # template = "{} {} {}".format(self.provider_type, self.resource_name, self.id) + # return template + + # def get_url_resource(self): + # urls_templates = { + # CLOUD_PROVIDERS.AWS_S3 : '{resource}.s3.{region}.amazoneaws.com', + # CLOUD_PROVIDERS.AZURE_CONTAINER : '', + # } + # return urls_templates[self.provider_type].format(resource=self.resource_name) + + def get_storage_dirname(self): + return os.path.join(settings.CLOUD_STORAGE_ROOT, str(self.id)) + + def get_storage_logs_dirname(self): + return os.path.join(self.get_storage_dirname(), 'logs') + + def get_log_path(self): + return os.path.join(self.get_storage_dirname(), "storage.log") \ No newline at end of file diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 660c36afb57a..225508278843 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -12,6 +12,7 @@ from cvat.apps.engine import models from cvat.apps.engine.log import slogger from cvat.apps.dataset_manager.formats.utils import get_label_color +from cvat.apps.engine.cloud_provider import Credentials, get_cloud_storage_instance class BasicUserSerializer(serializers.ModelSerializer): def validate(self, data): @@ -255,12 +256,13 @@ class DataSerializer(serializers.ModelSerializer): remote_files = RemoteFileSerializer(many=True, default=[]) use_cache = serializers.BooleanField(default=False) copy_data = serializers.BooleanField(default=False) + cloud_storage_id = serializers.IntegerField(write_only=True, allow_null=True, required=False) class Meta: model = models.Data fields = ('chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter', 'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks', - 'use_cache', 'copy_data') + 'use_cache', 'copy_data', 'cloud_storage_id',) # pylint: disable=no-self-use def validate_frame_filter(self, value): @@ -648,3 +650,77 @@ def create(self, validated_data): models.Comment.objects.create(**comment) return db_review + +class CloudStorageSerializer(serializers.ModelSerializer): + owner = BasicUserSerializer(required=False) + session_token = serializers.CharField(max_length=50, allow_blank=True, required=False) + key = serializers.CharField(max_length=30, allow_blank=True, required=False) + secret_key = serializers.CharField(max_length=50, allow_blank=True, required=False) + + class Meta: + model = models.CloudStorage + fields = ( + 'provider_type', 'resource_name', 'session_token', 'owner', + 'key', 'secret_key', 'credentials_type', 'created_date', 'updated_date', + ) + read_only_fields = ('provider_type', 'created_date', 'updated_date', 'owner') + + def validate(self, attrs): + credentials = Credentials( + key = attrs.get('key'), + secret_key = attrs.get('secret_key'), + session_token = attrs.get('session_token'), + ) + if any(credentials.values()): + if attrs.get('provider_type') in (str(models.CloudProviderChoice.AZURE_CONTAINER)) and not credentials.key: + raise serializers.NotAuthenticated() + else: + # no access rights granted + raise serializers.NotAuthenticated() + return attrs + + def create(self, validated_data): + provider_type = validated_data.get('provider_type') + should_be_created = validated_data.pop('should_be_created') + credentials = Credentials( + key = validated_data.pop('key'), + secret_key = validated_data.pop('secret_key'), + session_token = validated_data.pop('session_token'), + credentials_type = validated_data.get('credentials_type') + ) + if should_be_created: + details = { + 'resource_name': validated_data.get('resource_name'), + 'session_token': credentials.session_token, + 'key': credentials.key, + 'secret_key': credentials.secret_key, + } + cloud_storage_instance = get_cloud_storage_instance(cloud_provider=provider_type, **details) + + try: + cloud_storage_instance.create() + except Exception: + pass + + db_storage = models.CloudStorage.objects.create( + credentials=credentials.convert_to_db(), + **validated_data + ) + db_storage.save() + return db_storage + + # pylint: disable=no-self-use + def update(self, instance, validated_data): + credentials = Credentials() + credentials.convert_from_db({ + 'type': instance.credentials_type, + 'value': instance.credentials, + }) + tmp = {k:v for k,v in validated_data.items() if k in ('key', 'secret_key', 'session_token', 'credentials_type')} + credentials.mapping_with_new_values(tmp) + instance.credentials = credentials.convert_to_db() + instance.credentials_type = validated_data.get('credentials_type', instance.credentials_type) + instance.resource_name = validated_data.get('resource_name', instance.resource_name) + + instance.save() + return instance \ No newline at end of file diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 9d89d1d0e850..39e1a48b2de0 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -222,7 +222,8 @@ def _create_thread(tid, data): upload_dir = db_data.get_upload_dirname() if data['remote_files']: - data['remote_files'] = _download_data(data['remote_files'], upload_dir) + if db_data.storage != StorageChoice.CLOUD_STORAGE: + data['remote_files'] = _download_data(data['remote_files'], upload_dir) meta_info_file = [] media = _count_files(data, meta_info_file) @@ -368,6 +369,35 @@ def update_progress(progress): for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) + # def processing_files_on_cloud_storage(): + # from .cloud_provider import Credentials, get_cloud_storage_instance + # from cvat.apps.engine.models import CloudProviderChoice + + # #TODO: only on first iteration of implementation + # if media_type != 'images': + # raise NotImplementedError() + + # if not meta_info_file: + # raise Exception('A meta information was not found') + + # db_cloud_storage = db_data.cloud_storage + # credentials = Credentials() + # credentials.convert_from_db({ + # 'type': db_cloud_storage.credentials_type, + # 'value': db_cloud_storage.value, + # }) + + # details = { + # 'resource_name': db_cloud_storage.resource_name, + # 'session_token': credentials.session_token, + # 'key': credentials.key, + # 'secret_key': credentials.secret_key, + # } + # cloud_storage_instance = get_cloud_storage_instance(cloud_provider=provider_type, **details) + # meta = cloud_storage_instance.download_file(meta_info_file[0]) + # #TODO + + if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size) diff --git a/cvat/apps/engine/urls.py b/cvat/apps/engine/urls.py index da0c1f2e6bbe..0d682054ad5a 100644 --- a/cvat/apps/engine/urls.py +++ b/cvat/apps/engine/urls.py @@ -53,6 +53,7 @@ def _map_format_to_schema(request, scheme=None): router.register('issues', views.IssueViewSet) router.register('comments', views.CommentViewSet) router.register('restrictions', RestrictionsViewSet, basename='restrictions') +router.register('cloudstorages', views.CloudStorageViewSet) urlpatterns = [ # Entry point for a client diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 972f96ffb68d..1b1b6363ddb6 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -16,7 +16,7 @@ from django.conf import settings from django.contrib.auth.models import User from django.db import IntegrityError -from django.http import HttpResponse +from django.http import HttpResponse, HttpResponseNotFound from django.utils import timezone from django.utils.decorators import method_decorator from django_filters import rest_framework as filters @@ -39,7 +39,8 @@ from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.models import ( Job, StatusChoice, Task, Project, Review, Issue, - Comment, StorageMethodChoice, ReviewStatus, StorageChoice + Comment, StorageMethodChoice, ReviewStatus, StorageChoice, + CloudStorage, CredentialsTypeChoice, CloudProviderChoice ) from cvat.apps.engine.serializers import ( AboutSerializer, AnnotationFileSerializer, BasicUserSerializer, @@ -47,13 +48,15 @@ FileInfoSerializer, JobSerializer, LabeledDataSerializer, LogEventSerializer, ProjectSerializer, ProjectSearchSerializer, RqStatusSerializer, TaskSerializer, UserSerializer, PluginsSerializer, ReviewSerializer, - CombinedReviewSerializer, IssueSerializer, CombinedIssueSerializer, CommentSerializer + CombinedReviewSerializer, IssueSerializer, CombinedIssueSerializer, CommentSerializer, + CloudStorageSerializer ) from cvat.apps.engine.utils import av_scan_paths from . import models, task from .log import clogger, slogger +from cvat.apps.engine.cloud_provider import Credentials, get_cloud_storage_instance class ServerViewSet(viewsets.ViewSet): serializer_class = None @@ -416,9 +419,12 @@ def data(self, request, pk): if data['use_cache']: db_task.data.storage_method = StorageMethodChoice.CACHE db_task.data.save(update_fields=['storage_method']) - if data['server_files'] and data.get('copy_data') == False: + if data['server_files'] and not data.get('copy_data'): db_task.data.storage = StorageChoice.SHARE db_task.data.save(update_fields=['storage']) + if db_data.cloud_storage: + db_task.data.storage = StorageChoice.CLOUD_STORAGE + db_task.data.save(update_fields=['storage']) # if the value of stop_frame is 0, then inside the function we cannot know # the value specified by the user or it's default value from the database if 'stop_frame' not in serializer.validated_data: @@ -950,6 +956,151 @@ def self(self, request): serializer = serializer_class(request.user, context={ "request": request }) return Response(serializer.data) + +@method_decorator(name='list', decorator=swagger_auto_schema( + operation_summary='Returns a paginated list of storages according to query parameters', + manual_parameters=[ + openapi.Parameter('provider_type', openapi.IN_QUERY, description="A supported provider of cloud storages", + type=openapi.TYPE_STRING, enum=CloudProviderChoice.list()), + openapi.Parameter('resource_name', openapi.IN_QUERY, description="A name of buket or container", type=openapi.TYPE_STRING), + #openapi.Parameter('key', openapi.IN_QUERY, description="Access key id for AWS S3 or Account Name for Azure container", type=openapi.TYPE_STRING), + #openapi.Parameter('secret_key', openapi.IN_QUERY, description="Secret key", type=openapi.TYPE_STRING), + #openapi.Parameter('token', openapi.IN_QUERY, description="A session token for s3 or sas token for azure", type=openapi.TYPE_STRING), + openapi.Parameter('owner', openapi.IN_QUERY, description="A resource owner ", type=openapi.TYPE_STRING), + openapi.Parameter('credentials_type', openapi.IN_QUERY, description="A type of a granting access", type=openapi.TYPE_STRING, enum=CredentialsTypeChoice.list()), + ], + responses={'200': CloudStorageSerializer(many=True)} + ) +) +@method_decorator(name='retrieve', decorator=swagger_auto_schema(operation_summary='Method returns details of a specific cloud storage')) +@method_decorator(name='create', decorator=swagger_auto_schema( + operation_summary='Method creates a cloud storage with a specified characteristics', + responses={'201': openapi.Response(description='A storage has beed created')} + ) +) +@method_decorator(name='destroy', decorator=swagger_auto_schema(operation_summary='Method deletes a specific cloud storage')) +@method_decorator(name='partial_update', decorator=swagger_auto_schema(operation_summary='Methods does a partial update of chosen fields in a cloud storage instance')) +class CloudStorageViewSet(auth.CloudStorageGetQuerySetMixin, viewsets.ModelViewSet): + http_method_names = ['get', 'post', 'patch', 'delete'] #'head' + queryset = CloudStorage.objects.all().prefetch_related('data').order_by('-id') + serializer_class = CloudStorageSerializer + search_fields = ("provider_type", "resource_name", "owner__username") + + def get_permissions(self): + http_method = self.request.method + permissions = [IsAuthenticated] + + if http_method in SAFE_METHODS: # GET, HEAD, OPTIONS + permissions.append(auth.CloudStorageAccessPermission) + elif http_method in ("POST"): + permissions.append(auth.CloudStorageCreatePermission) + elif http_method in ("PATCH"): + permissions.append(auth.CloudStorageChangePermission) + elif http_method in ("DELETE"): + permissions.append(auth.CloudStorageDeletePermission) + else: + permissions.append(auth.AdminRolePermission) + + return [perm() for perm in permissions] + + def perform_create(self, serializer): + # check that instance of cloud storage exists + provider_type = serializer.validated_data.get('provider_type') + details = { + 'resource_name': serializer.validated_data.get('resource_name'), + 'session_token': serializer.validated_data.get('session_token'), + 'key': serializer.validated_data.get('key'), + 'secret_key': serializer.validated_data.get('secret_key'), + } + cloud_storage_instance = get_cloud_storage_instance(cloud_provider=provider_type, **details) + + try: + cloud_storage_instance.is_exist() + except Exception as ex: + message = str(ex) + slogger.glob.error(message) + raise serializers.ValidationError(message) + + owner = self.request.data.get('owner') + if owner: + serializer.save() + else: + serializer.save(owner=self.request.user) + + def perform_destroy(self, instance): + cloud_storage_dirname = instance.get_storage_dirname() + super().perform_destroy(instance) + shutil.rmtree(cloud_storage_dirname, ignore_errors=True) + + # def list(self, request): + # provider_type = request.query_params.get('provider_type') + # if provider_type: + # try: + # assert provider_type in CloudProviderChoice.list(), (msg:='Unsupported type of a cloud storage provider') + # except AssertionError: + # return Response(data=msg, status=HTTP_400_BAD_REQUEST) + # queryset = self.get_queryset() + # if provider_type: + # queryset = queryset.filter(provider_type=provider_type) + # serializer = CloudStorageSerializer(queryset, many=True) + # return Response(serializer.data) + + def retrieve(self, request, pk): + from django.forms.models import model_to_dict + try: + db_storage = CloudStorage.objects.get(pk=pk) + credentials = Credentials() + credentials.convert_from_db({ + 'type': db_storage.credentials_type, + 'value': db_storage.credentials, + }) + serializer = self.get_serializer(model_to_dict(db_storage), context={ + 'key': credentials.key, + 'secret_key': credentials.secret_key, + 'session_token': credentials.session_token + }) + return Response(serializer.data) + except CloudStorage.DoesNotExist: + message = f"Storage {pk} does not exist" + slogger.glob.error(message) + return HttpResponseNotFound(message) + except Exception: + pass + + @method_decorator( + name='retrieve', + decorator=swagger_auto_schema( + operation_summary='Method returns list of available files', + responses={'200': openapi.Response(description='A list of a storage content')} + ) + ) + @action(detail=True, methods=['GET']) + def content(self, request, pk): + try: + db_storage = CloudStorage.objects.get(pk=pk) + + credentials = Credentials() + credentials.convert_from_db({ + 'type': db_storage.credentials_type, + 'value': db_storage.credentials, + }) + details = { + 'resource_name': db_storage.resource_name, + 'session_token': credentials.session_token, + 'key': credentials.key, + 'secret_key': credentials.secret_key, + } + cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) + + + cloud_storage_instance.initialize_content() + return Response(data=cloud_storage_instance.content, content_type="text/plain") + + except CloudStorage.DoesNotExist: + message = f"Storage {pk} does not exist" + slogger.glob.error(message) + return HttpResponseNotFound(message) + def rq_handler(job, exc_type, exc_value, tb): job.exc_info = "".join( traceback.format_exception_only(exc_type, exc_value)) diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index 4d9dabf1e336..d178666766da 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -44,4 +44,6 @@ tensorflow==2.2.1 # Optional requirement of Datumaro # archives. Don't use as a python module because it has GPL license. patool==1.12 diskcache==5.0.2 -git+https://github.com/openvinotoolkit/datumaro@v0.1.3 \ No newline at end of file +git+https://github.com/openvinotoolkit/datumaro@v0.1.3 +boto3==1.16.26 +azure-storage-blob==12.6.0 \ No newline at end of file diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 8eb2a86097d1..47f3df133b3c 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -361,6 +361,9 @@ def add_ssh_keys(): MIGRATIONS_LOGS_ROOT = os.path.join(LOGS_ROOT, 'migrations') os.makedirs(MIGRATIONS_LOGS_ROOT, exist_ok=True) +CLOUD_STORAGE_ROOT = os.path.join(BASE_DIR, 'storages') +os.makedirs(CLOUD_STORAGE_ROOT, exist_ok=True) + LOGGING = { 'version': 1, 'disable_existing_loggers': False, From 63c12f15acaec111efdea0435b24901eccc8eb6c Mon Sep 17 00:00:00 2001 From: Maya Date: Wed, 23 Dec 2020 10:59:53 +0300 Subject: [PATCH 02/26] Fix: save only necessary info, credentials after code redesign --- cvat/apps/engine/cloud_provider.py | 84 ++++++++++++++---------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 677b0603aeb8..7ba968881be8 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -15,6 +15,13 @@ class CloudStorage(ABC): + def __init__(self): + self._files = [] + + @abstractproperty + def name(self): + pass + @abstractmethod def create(self): pass @@ -31,10 +38,6 @@ def is_exist(self): # def supported_files(self): # pass - @abstractproperty - def content(self): - pass - @abstractmethod def initialize_content(self): pass @@ -47,6 +50,16 @@ def download_file(self, key): def upload_file(self, file_obj, file_name): pass + def __contains__(self, file_name): + return file_name in (item['name'] for item in self._files.values()) + + def __len__(self): + return len(self._files) + + @property + def content(self): + return map(lambda x: x['name'] , self._files) + def get_cloud_storage_instance(cloud_provider, **details): instance = None if cloud_provider == str(CloudProviderChoice.AWS_S3): @@ -67,6 +80,7 @@ def get_cloud_storage_instance(cloud_provider, **details): class AWS_S3(CloudStorage): def __init__(self, **kwargs): + super().__init__() assert (bucket_name := kwargs.get('bucket')), 'Bucket name was not found' self._bucket_name = bucket_name @@ -84,20 +98,15 @@ def __init__(self, **kwargs): self._s3 = boto3.resource('s3') self._bucket = self._s3.Bucket(bucket_name) - self._files = [] @property def bucket(self): return self._bucket @property - def bucket_name(self): + def name(self): return self._bucket_name - @property - def content(self): - return map(lambda x: x.key ,self._files) - # def is_object_exist(self, verifiable='bucket_exist', config=None): # waiter = self._client_s3.get_waiter(verifiable) # waiter.wait(**config) @@ -129,13 +138,6 @@ def is_object_exist(self, key_object): except WaiterError: raise Exception('A file {} unavailable'.format(key_object)) - - def __len__(self): - return len(self._files) - - def __contains__(self, file_name): - return file_name in (item.key for item in self._files.values()) - def head(self): pass @@ -151,8 +153,10 @@ def upload_file(self, file_obj, file_name): ) def initialize_content(self): - #TODO: оставить только нужную информацию :D - self._files = list(self._bucket.objects.all()) + files = self._bucket.objects.all() + self._files = [{ + 'name': item.key, + } for item in files] def download_file(self, key): buf = BytesIO() @@ -181,6 +185,7 @@ def create(self): class AzureBlobContainer(CloudStorage): def __init__(self, **kwargs): + super().__init__() assert (container_name := kwargs.get('container_name')), 'Container name was not found' assert (account_name := kwargs.get('account_name')), 'Account name was not found' assert (credentials := kwargs.get('sas_token') if kwargs.get('sas_token') else kwargs.get('account_access_key')), 'Credentials were not granted' @@ -189,11 +194,14 @@ def __init__(self, **kwargs): self._container_client = self._blob_service_client.get_container_client(container_name) self._account_name = account_name - self._files = [] @property def container(self): - return self._container + return self._container_client + + @property + def name(self): + return self._container_client.container_name @property def account_url(self): @@ -239,11 +247,10 @@ def upload_file(self, file_obj, file_name): # pass def initialize_content(self): - self._files = self._container_client.list_blobs() - - @property - def content(self): - return self._files + files = self._container_client.list_blobs() + self._files = [{ + 'name': item.name + } for item in files] def download_file(self, key): MAX_CONCURRENCY = 3 @@ -262,9 +269,9 @@ class Credentials: __slots__ = ('key', 'secret_key', 'session_token', 'credentials_type') def __init__(self, **credentials): - self.key = credentials.get('key', None) - self.secret_key = credentials.get('secret_key', None) - self.session_token = credentials.get('session_token', None) + self.key = credentials.get('key', '') + self.secret_key = credentials.get('secret_key', '') + self.session_token = credentials.get('session_token', '') self.credentials_type = credentials.get('credentials_type', None) def convert_to_db(self): @@ -286,21 +293,10 @@ def convert_from_db(self, credentials): else: self.secret_key = second def mapping_with_new_values(self, credentials): - # credentials = { - # 'type' : string, optional - # 'key' : string, optional - # 'secret_key': string, optional - # 'session_token': string, optional - # } - - if hasattr(credentials, 'type'): - self.credentials_type = credentials.get('type') - if hasattr(credentials, 'key'): - self.key = credentials.get('key') - elif hasattr(credentials, 'secret_key'): - self.secret_key = credentials.get('secret_key') - elif hasattr(credentials, 'session_token'): - self.session_token = credentials.get('session_token') + self.credentials_type = credentials.get('credentials_type', self.credentials_type) + self.key = credentials.get('key', self.key) + self.secret_key = credentials.get('secret_key', self.secret_key) + self.session_token = credentials.get('session_token', self.session_token) def values(self): return [self.key, self.secret_key, self.session_token] From 627398d48d8f0028691728d9b706945fdcb3613d Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 25 Dec 2020 19:44:42 +0300 Subject: [PATCH 03/26] Deleted unnecessary & some changes & some fixes --- cvat/apps/authentication/auth.py | 13 ------ cvat/apps/engine/models.py | 12 +----- cvat/apps/engine/serializers.py | 21 +++++---- cvat/apps/engine/views.py | 74 +++++++++----------------------- 4 files changed, 36 insertions(+), 84 deletions(-) diff --git a/cvat/apps/authentication/auth.py b/cvat/apps/authentication/auth.py index d00df10f2341..5c1f8ea3c81c 100644 --- a/cvat/apps/authentication/auth.py +++ b/cvat/apps/authentication/auth.py @@ -196,9 +196,6 @@ def is_cloud_storage_owner(db_user, db_storage): rules.add_perm('engine.cloudstorage.create', has_admin_role | has_user_role) rules.add_perm('engine.cloudstorage.change', has_admin_role | is_cloud_storage_owner) -# dry -rules.add_perm('engine.cloudstorage.access', has_admin_role | is_cloud_storage_owner) -rules.add_perm('engine.cloudstorage.delete', has_admin_role | is_cloud_storage_owner) class AdminRolePermission(BasePermission): # pylint: disable=no-self-use @@ -339,11 +336,6 @@ class CommentChangePermission(BasePermission): def has_object_permission(self, request, view, obj): return request.user.has_perm('engine.comment.change', obj) -class CloudStorageCreatePermission(BasePermission): - # pylint: disable=no-self-use - def has_permission(self, request, view): - return request.user.has_perm("engine.cloudstorage.create") - class CloudStorageAccessPermission(BasePermission): # pylint: disable=no-self-use def has_object_permission(self, request, view, obj): @@ -354,11 +346,6 @@ class CloudStorageChangePermission(BasePermission): def has_object_permission(self, request, view, obj): return request.user.has_perm("engine.cloudstorage.change", obj) -class CloudStorageDeletePermission(BasePermission): - # pylint: disable=no-self-use - def has_object_permission(self, request, view, obj): - return request.user.has_perm("engine.cloudstorage.change", obj) - class CloudStorageGetQuerySetMixin(object): def get_queryset(self): queryset = super().get_queryset() diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 0c59f06a5f64..6dd3740522a9 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -529,16 +529,8 @@ class Meta: default_permissions = () unique_together = (('provider_type', 'resource_name', 'credentials'),) - # def __str__(self): - # template = "{} {} {}".format(self.provider_type, self.resource_name, self.id) - # return template - - # def get_url_resource(self): - # urls_templates = { - # CLOUD_PROVIDERS.AWS_S3 : '{resource}.s3.{region}.amazoneaws.com', - # CLOUD_PROVIDERS.AZURE_CONTAINER : '', - # } - # return urls_templates[self.provider_type].format(resource=self.resource_name) + def __str__(self): + return "{} {} {}".format(self.provider_type, self.resource_name, self.id) def get_storage_dirname(self): return os.path.join(settings.CLOUD_STORAGE_ROOT, str(self.id)) diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 225508278843..48dc44ddb28c 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -651,6 +651,11 @@ def create(self, validated_data): return db_review +class BaseCloudStorageSerializer(serializers.ModelSerializer): + class Meta: + model = models.CloudStorage + fields = '__all__' + class CloudStorageSerializer(serializers.ModelSerializer): owner = BasicUserSerializer(required=False) session_token = serializers.CharField(max_length=50, allow_blank=True, required=False) @@ -663,7 +668,7 @@ class Meta: 'provider_type', 'resource_name', 'session_token', 'owner', 'key', 'secret_key', 'credentials_type', 'created_date', 'updated_date', ) - read_only_fields = ('provider_type', 'created_date', 'updated_date', 'owner') + read_only_fields = ('created_date', 'updated_date', 'owner') def validate(self, attrs): credentials = Credentials( @@ -672,20 +677,20 @@ def validate(self, attrs): session_token = attrs.get('session_token'), ) if any(credentials.values()): - if attrs.get('provider_type') in (str(models.CloudProviderChoice.AZURE_CONTAINER)) and not credentials.key: - raise serializers.NotAuthenticated() + if attrs.get('provider_type') == models.CloudProviderChoice.AZURE_CONTAINER and not credentials.key: + raise serializers.ValidationError('A credentials were not found') else: # no access rights granted - raise serializers.NotAuthenticated() + raise serializers.ValidationError('A credentials were not found') return attrs def create(self, validated_data): provider_type = validated_data.get('provider_type') - should_be_created = validated_data.pop('should_be_created') + should_be_created = validated_data.pop('should_be_created', None) credentials = Credentials( - key = validated_data.pop('key'), - secret_key = validated_data.pop('secret_key'), - session_token = validated_data.pop('session_token'), + key = validated_data.pop('key', ''), + secret_key = validated_data.pop('secret_key', ''), + session_token = validated_data.pop('session_token', ''), credentials_type = validated_data.get('credentials_type') ) if should_be_created: diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 1b1b6363ddb6..3bdda3850003 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -49,7 +49,7 @@ LogEventSerializer, ProjectSerializer, ProjectSearchSerializer, RqStatusSerializer, TaskSerializer, UserSerializer, PluginsSerializer, ReviewSerializer, CombinedReviewSerializer, IssueSerializer, CombinedIssueSerializer, CommentSerializer, - CloudStorageSerializer + CloudStorageSerializer, BaseCloudStorageSerializer ) from cvat.apps.engine.utils import av_scan_paths @@ -963,10 +963,7 @@ def self(self, request): openapi.Parameter('provider_type', openapi.IN_QUERY, description="A supported provider of cloud storages", type=openapi.TYPE_STRING, enum=CloudProviderChoice.list()), openapi.Parameter('resource_name', openapi.IN_QUERY, description="A name of buket or container", type=openapi.TYPE_STRING), - #openapi.Parameter('key', openapi.IN_QUERY, description="Access key id for AWS S3 or Account Name for Azure container", type=openapi.TYPE_STRING), - #openapi.Parameter('secret_key', openapi.IN_QUERY, description="Secret key", type=openapi.TYPE_STRING), - #openapi.Parameter('token', openapi.IN_QUERY, description="A session token for s3 or sas token for azure", type=openapi.TYPE_STRING), - openapi.Parameter('owner', openapi.IN_QUERY, description="A resource owner ", type=openapi.TYPE_STRING), + openapi.Parameter('owner', openapi.IN_QUERY, description="A resource owner", type=openapi.TYPE_STRING), openapi.Parameter('credentials_type', openapi.IN_QUERY, description="A type of a granting access", type=openapi.TYPE_STRING, enum=CredentialsTypeChoice.list()), ], responses={'200': CloudStorageSerializer(many=True)} @@ -981,28 +978,38 @@ def self(self, request): @method_decorator(name='destroy', decorator=swagger_auto_schema(operation_summary='Method deletes a specific cloud storage')) @method_decorator(name='partial_update', decorator=swagger_auto_schema(operation_summary='Methods does a partial update of chosen fields in a cloud storage instance')) class CloudStorageViewSet(auth.CloudStorageGetQuerySetMixin, viewsets.ModelViewSet): - http_method_names = ['get', 'post', 'patch', 'delete'] #'head' + http_method_names = ['get', 'post', 'patch', 'delete'] queryset = CloudStorage.objects.all().prefetch_related('data').order_by('-id') - serializer_class = CloudStorageSerializer search_fields = ("provider_type", "resource_name", "owner__username") + filterset_fields = ['provider_type', 'resource_name', 'credentials_type'] def get_permissions(self): http_method = self.request.method permissions = [IsAuthenticated] - if http_method in SAFE_METHODS: # GET, HEAD, OPTIONS + if http_method in SAFE_METHODS: permissions.append(auth.CloudStorageAccessPermission) - elif http_method in ("POST"): - permissions.append(auth.CloudStorageCreatePermission) - elif http_method in ("PATCH"): + elif http_method in ("POST", "PATCH", "DELETE"): permissions.append(auth.CloudStorageChangePermission) - elif http_method in ("DELETE"): - permissions.append(auth.CloudStorageDeletePermission) else: permissions.append(auth.AdminRolePermission) - return [perm() for perm in permissions] + def get_serializer_class(self): + if self.request.method in ("POST", "PATCH"): + return CloudStorageSerializer + else: + return BaseCloudStorageSerializer + + def get_queryset(self): + queryset = super().get_queryset() + if (provider_type := self.request.query_params.get('provider_type', None)): + if provider_type in CloudProviderChoice.list(): + queryset = queryset.filter(provider_type=provider_type) + else: + raise ValidationError('Unsupported type of cloud provider') + return queryset + def perform_create(self, serializer): # check that instance of cloud storage exists provider_type = serializer.validated_data.get('provider_type') @@ -1013,7 +1020,6 @@ def perform_create(self, serializer): 'secret_key': serializer.validated_data.get('secret_key'), } cloud_storage_instance = get_cloud_storage_instance(cloud_provider=provider_type, **details) - try: cloud_storage_instance.is_exist() except Exception as ex: @@ -1032,41 +1038,6 @@ def perform_destroy(self, instance): super().perform_destroy(instance) shutil.rmtree(cloud_storage_dirname, ignore_errors=True) - # def list(self, request): - # provider_type = request.query_params.get('provider_type') - # if provider_type: - # try: - # assert provider_type in CloudProviderChoice.list(), (msg:='Unsupported type of a cloud storage provider') - # except AssertionError: - # return Response(data=msg, status=HTTP_400_BAD_REQUEST) - # queryset = self.get_queryset() - # if provider_type: - # queryset = queryset.filter(provider_type=provider_type) - # serializer = CloudStorageSerializer(queryset, many=True) - # return Response(serializer.data) - - def retrieve(self, request, pk): - from django.forms.models import model_to_dict - try: - db_storage = CloudStorage.objects.get(pk=pk) - credentials = Credentials() - credentials.convert_from_db({ - 'type': db_storage.credentials_type, - 'value': db_storage.credentials, - }) - serializer = self.get_serializer(model_to_dict(db_storage), context={ - 'key': credentials.key, - 'secret_key': credentials.secret_key, - 'session_token': credentials.session_token - }) - return Response(serializer.data) - except CloudStorage.DoesNotExist: - message = f"Storage {pk} does not exist" - slogger.glob.error(message) - return HttpResponseNotFound(message) - except Exception: - pass - @method_decorator( name='retrieve', decorator=swagger_auto_schema( @@ -1078,7 +1049,6 @@ def retrieve(self, request, pk): def content(self, request, pk): try: db_storage = CloudStorage.objects.get(pk=pk) - credentials = Credentials() credentials.convert_from_db({ 'type': db_storage.credentials_type, @@ -1091,8 +1061,6 @@ def content(self, request, pk): 'secret_key': credentials.secret_key, } cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) - - cloud_storage_instance.initialize_content() return Response(data=cloud_storage_instance.content, content_type="text/plain") From 9324ad1fb2cf9071144df0de6aae88340781f7dc Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 25 Dec 2020 20:24:39 +0300 Subject: [PATCH 04/26] Fix --- cvat/apps/engine/cloud_provider.py | 42 ++++++++++++++++++------------ 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 7ba968881be8..38f7b4d98770 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -43,9 +43,15 @@ def initialize_content(self): pass @abstractmethod - def download_file(self, key): + def download_fileobj(self, key): pass + def download_file(self, key, path): + file_obj = self.download_fileobj(key) + if isinstance(file_obj, BytesIO): + with open(path, 'wb') as f: + f.write(file_obj.getvalue()) + @abstractmethod def upload_file(self, file_obj, file_name): pass @@ -62,14 +68,14 @@ def content(self): def get_cloud_storage_instance(cloud_provider, **details): instance = None - if cloud_provider == str(CloudProviderChoice.AWS_S3): + if cloud_provider == CloudProviderChoice.AWS_S3: instance = AWS_S3( bucket=details.get('resource_name'), session_token=details.get('session_token'), key_id=details.get('key'), secret_key=details.get('secret_key') ) - elif cloud_provider == str(CloudProviderChoice.AZURE_CONTAINER): + elif cloud_provider == CloudProviderChoice.AZURE_CONTAINER: instance = AzureBlobContainer( container_name=details.get('resource_name'), sas_token=details.get('session_token'), @@ -117,8 +123,8 @@ def is_exist(self): waiter.wait( Bucket=self._bucket_name, WaiterConfig={ - 'Delay': 10, # The amount of time in seconds to wait between attempts. Default: 5 - 'MaxAttempts': 10 # The maximum number of attempts to be made. Default: 20 + 'Delay': 5, # The amount of time in seconds to wait between attempts. Default: 5 + 'MaxAttempts': 3 # The maximum number of attempts to be made. Default: 20 } ) except WaiterError: @@ -131,8 +137,8 @@ def is_object_exist(self, key_object): Bucket=self._bucket, Key=key_object, WaiterConfig={ - 'Delay': 10, - 'MaxAttempts': 10, + 'Delay': 5, + 'MaxAttempts': 3, }, ) except WaiterError: @@ -158,14 +164,14 @@ def initialize_content(self): 'name': item.key, } for item in files] - def download_file(self, key): + def download_fileobj(self, key): buf = BytesIO() - with open(buf,'wb') as file_buf: - self.bucket.download_fileobj( - Key=key, - Fileobj=file_buf, - Config=TransferConfig(max_io_queue=10) - )# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig + self.bucket.download_fileobj( + Key=key, + Fileobj=buf, + Config=TransferConfig(max_io_queue=10) + ) + buf.seek(0) return buf def create(self): @@ -252,15 +258,17 @@ def initialize_content(self): 'name': item.name } for item in files] - def download_file(self, key): + def download_fileobj(self, key): MAX_CONCURRENCY = 3 + buf = BytesIO() storage_stream_downloader = self._container_client.download_blob( blob=key, offset=None, length=None, ) - return storage_stream_downloader.content_as_bytes(max_concurrency=MAX_CONCURRENCY) - + storage_stream_downloader.download_to_stream(buf, max_concurrency=MAX_CONCURRENCY) + buf.seek(0) + return buf class GOOGLE_DRIVE(CloudStorage): pass From a7399f392ddd3ffbd092498d41ad1dd3c8f83966 Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 25 Dec 2020 20:42:44 +0300 Subject: [PATCH 05/26] Add(cache): support files on cloud storage --- cvat/apps/engine/cache.py | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index 2e270bec760d..218a23d8cef3 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -11,8 +11,9 @@ from cvat.apps.engine.media_extractors import (Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter, ZipChunkWriter, ZipCompressedChunkWriter) from cvat.apps.engine.models import DataChoice, StorageChoice -from cvat.apps.engine.prepare import PrepareInfo +from cvat.apps.engine.prepare import PrepareInfo, md5_hash, ParsingMeta +from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials class CacheInteraction: def __init__(self): self._cache = Cache(settings.CACHE_ROOT) @@ -46,16 +47,42 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): StorageChoice.LOCAL: db_data.get_upload_dirname(), StorageChoice.SHARE: settings.SHARE_ROOT }[db_data.storage] - if os.path.exists(db_data.get_meta_path()): + if hasattr(db_data, 'video'): source_path = os.path.join(upload_dir, db_data.video.path) meta = PrepareInfo(source_path=source_path, meta_path=db_data.get_meta_path()) for frame in meta.decode_needed_frames(chunk_number, db_data): images.append(frame) writer.save_as_chunk([(image, source_path, None) for image in images], buff) else: - with open(db_data.get_dummy_chunk_path(chunk_number), 'r') as dummy_file: - images = [os.path.join(upload_dir, line.strip()) for line in dummy_file] - writer.save_as_chunk([(image, image, None) for image in images], buff) + if db_data.storage == StorageChoice.CLOUD_STORAGE: + db_cloud_storage = db_data.cloud_storage + credentials = Credentials() + credentials.convert_from_db({ + 'type': db_cloud_storage.credentials_type, + 'value': db_cloud_storage.credentials, + }) + details = { + 'resource_name': db_cloud_storage.resource_name, + 'session_token': credentials.session_token, + 'key': credentials.key, + 'secret_key': credentials.secret_key, + } + cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) + cloud_storage_instance.initialize_content() + meta_parser = ParsingMeta(db_data.get_meta_path()) + for img_name, _ , img_hash in meta_parser.parsing(start=db_data.start_frame, step=db_data.get_frame_step(), \ + stop=db_data.start_frame, chunk_size=db_data.chunk_size, chunk_number=chunk_number): + if img_name not in cloud_storage_instance: + # or need to generate dummy image? + raise Exception('{} file was not found on a {} storage'.format(img_name, cloud_storage_instance.name)) + image = cloud_storage_instance.download_fileobj(img_name) + assert md5_hash(image) != img_hash, "Image '{}' does not match with origin image".format(img_name) + images.append((img_name, image, None)) + writer.save_as_chunk(images, buff) + else: + with open(db_data.get_dummy_chunk_path(chunk_number), 'r') as dummy_file: + images = [os.path.join(upload_dir, line.strip()) for line in dummy_file] + writer.save_as_chunk([(image, image, None) for image in images], buff) buff.seek(0) return buff, mime_type From 3cb547e84d4dbd1bf4d603517170ef20b444fb42 Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 25 Dec 2020 20:43:19 +0300 Subject: [PATCH 06/26] tmp --- cvat/apps/engine/prepare.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cvat/apps/engine/prepare.py b/cvat/apps/engine/prepare.py index a4fef43e96d1..63806f49ac73 100644 --- a/cvat/apps/engine/prepare.py +++ b/cvat/apps/engine/prepare.py @@ -4,6 +4,7 @@ import av from collections import OrderedDict +from io import BytesIO import hashlib import os from cvat.apps.engine.utils import rotate_image @@ -57,7 +58,10 @@ def check_video_timestamps_sequences(self): self._close_video_container(container) def md5_hash(frame): - return hashlib.md5(frame.to_image().tobytes()).hexdigest() + if isinstance(frame, av.VideoFrame): + return hashlib.md5(frame.to_image().tobytes()).hexdigest() + elif isinstance(frame, BytesIO): + return hashlib.md5(frame.getvalue()).hexdigest() class PrepareInfo(WorkWithVideo): @@ -239,6 +243,13 @@ def check_frames_numbers(self): return self._close_video_container(container) +class ParsingMeta: + def __init__(self, **kwargs): + pass + + def parsing(self, start, step, stop, chunk_size, chunk_number): + raise NotImplementedError() + def prepare_meta(media_file, upload_dir=None, meta_dir=None, chunk_size=None): paths = { 'source_path': os.path.join(upload_dir, media_file) if upload_dir else media_file, From b177bed2007d96e4b63428d5744a5a24b7a133e0 Mon Sep 17 00:00:00 2001 From: Maya Date: Tue, 12 Jan 2021 10:38:37 +0300 Subject: [PATCH 07/26] Fix --- cvat/apps/engine/cache.py | 9 +++------ cvat/apps/engine/serializers.py | 2 +- cvat/settings/base.py | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index 218a23d8cef3..a2f037ea5bdf 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -51,8 +51,7 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): source_path = os.path.join(upload_dir, db_data.video.path) meta = PrepareInfo(source_path=source_path, meta_path=db_data.get_meta_path()) for frame in meta.decode_needed_frames(chunk_number, db_data): - images.append(frame) - writer.save_as_chunk([(image, source_path, None) for image in images], buff) + images.append((frame, source_path, None)) else: if db_data.storage == StorageChoice.CLOUD_STORAGE: db_cloud_storage = db_data.cloud_storage @@ -78,12 +77,10 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): image = cloud_storage_instance.download_fileobj(img_name) assert md5_hash(image) != img_hash, "Image '{}' does not match with origin image".format(img_name) images.append((img_name, image, None)) - writer.save_as_chunk(images, buff) else: with open(db_data.get_dummy_chunk_path(chunk_number), 'r') as dummy_file: - images = [os.path.join(upload_dir, line.strip()) for line in dummy_file] - writer.save_as_chunk([(image, image, None) for image in images], buff) - + images = [((image := os.path.join(upload_dir, line.strip())), image, None) for line in dummy_file] + writer.save_as_chunk(images, buff) buff.seek(0) return buff, mime_type diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 48dc44ddb28c..d4742ef285e1 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -654,7 +654,7 @@ def create(self, validated_data): class BaseCloudStorageSerializer(serializers.ModelSerializer): class Meta: model = models.CloudStorage - fields = '__all__' + exclude = ['credentials'] class CloudStorageSerializer(serializers.ModelSerializer): owner = BasicUserSerializer(required=False) diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 47f3df133b3c..c213eb42f40f 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -361,7 +361,7 @@ def add_ssh_keys(): MIGRATIONS_LOGS_ROOT = os.path.join(LOGS_ROOT, 'migrations') os.makedirs(MIGRATIONS_LOGS_ROOT, exist_ok=True) -CLOUD_STORAGE_ROOT = os.path.join(BASE_DIR, 'storages') +CLOUD_STORAGE_ROOT = os.path.join(DATA_ROOT, 'storages') os.makedirs(CLOUD_STORAGE_ROOT, exist_ok=True) LOGGING = { From 3be1fc760701d2a30d504296d136449ce4cf0006 Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 1 Mar 2021 14:28:27 +0300 Subject: [PATCH 08/26] Revert prettier changes --- .../annotation-page/appearance-block.tsx | 9 +-------- .../attribute-switcher.tsx | 12 ++--------- .../object-switcher.tsx | 12 ++--------- .../installation_automatic_annotation.md | 20 ++++++------------- .../actions_users/issue_1810_login_logout.js | 10 +++++----- tests/cypress/support/commands.js | 10 ++++------ 6 files changed, 20 insertions(+), 53 deletions(-) diff --git a/cvat-ui/src/components/annotation-page/appearance-block.tsx b/cvat-ui/src/components/annotation-page/appearance-block.tsx index b07dc93ef218..8ea48cf3e786 100644 --- a/cvat-ui/src/components/annotation-page/appearance-block.tsx +++ b/cvat-ui/src/components/annotation-page/appearance-block.tsx @@ -152,14 +152,7 @@ function AppearanceBlock(props: Props): JSX.Element { activeKey={appearanceCollapsed ? [] : ['appearance']} className='cvat-objects-appearance-collapse' > - - Appearance - - )} - key='appearance' - > + Appearance} key='appearance'>
Color by - @@ -38,11 +34,7 @@ function AttributeSwitcher(props: Props): JSX.Element { {` [${currentIndex + 1}/${attributesCount}]`} - diff --git a/cvat-ui/src/components/annotation-page/attribute-annotation-workspace/attribute-annotation-sidebar/object-switcher.tsx b/cvat-ui/src/components/annotation-page/attribute-annotation-workspace/attribute-annotation-sidebar/object-switcher.tsx index 504016bfa9b6..f371a287261e 100644 --- a/cvat-ui/src/components/annotation-page/attribute-annotation-workspace/attribute-annotation-sidebar/object-switcher.tsx +++ b/cvat-ui/src/components/annotation-page/attribute-annotation-workspace/attribute-annotation-sidebar/object-switcher.tsx @@ -27,11 +27,7 @@ function ObjectSwitcher(props: Props): JSX.Element { return (
- @@ -41,11 +37,7 @@ function ObjectSwitcher(props: Props): JSX.Element { {`[${currentIndex + 1}/${objectsCount}]`} - diff --git a/cvat/apps/documentation/installation_automatic_annotation.md b/cvat/apps/documentation/installation_automatic_annotation.md index 994a0977a377..e3343211ffd5 100644 --- a/cvat/apps/documentation/installation_automatic_annotation.md +++ b/cvat/apps/documentation/installation_automatic_annotation.md @@ -1,14 +1,13 @@ + ### Semi-automatic and Automatic Annotation -> **⚠ WARNING: Do not use `docker-compose up`** -> If you did, make sure all containers are stopped by `docker-compose down`. +> **⚠ WARNING: Do not use `docker-compose up`** +> If you did, make sure all containers are stopped by `docker-compose down`. - To bring up cvat with auto annotation tool, from cvat root directory, you need to run: - ```bash docker-compose -f docker-compose.yml -f components/serverless/docker-compose.serverless.yml up -d ``` - If you did any changes to the docker-compose files, make sure to add `--build` at the end. To stop the containers, simply run: @@ -22,7 +21,6 @@ It is important that the version you download matches the version in [docker-compose.serverless.yml](/components/serverless/docker-compose.serverless.yml) After downloading the nuclio, give it a proper permission and do a softlink - ``` sudo chmod +x nuctl--linux-amd64 sudo ln -sf $(pwd)/nuctl--linux-amd64 /usr/local/bin/nuctl @@ -47,13 +45,10 @@ --volume `pwd`/serverless/openvino/common:/opt/nuclio/common \ --platform local ``` - **Note:** - - See [deploy_cpu.sh](/serverless/deploy_cpu.sh) for more examples. #### GPU Support - You will need to install Nvidia Container Toolkit and make sure your docker supports GPU. Follow [Nvidia docker instructions](https://www.tensorflow.org/install/docker#gpu_support). Also you will need to add `--resource-limit nvidia.com/gpu=1` to the nuclio deployment command. As an example, below will run on the GPU: @@ -68,10 +63,9 @@ ``` **Note:** + - Since the model is loaded during deployment, the number of GPU functions you can deploy will be limited to your GPU memory. - - Since the model is loaded during deployment, the number of GPU functions you can deploy will be limited to your GPU memory. - - - See [deploy_gpu.sh](/serverless/deploy_gpu.sh) script for more examples. + - See [deploy_gpu.sh](/serverless/deploy_gpu.sh) script for more examples. ####Debugging Nuclio Functions: @@ -82,7 +76,6 @@ ```bash docker logs ``` - e.g., ```bash @@ -90,10 +83,9 @@ ``` - If you would like to debug a code inside a container, you can use vscode to directly attach to a container [instructions](https://code.visualstudio.com/docs/remote/attach-container). To apply your changes, make sure to restart the container. - ```bash docker restart ``` > **⚠ WARNING:** - > Do not use nuclio dashboard to stop the container because with any modifications, it rebuilds the container and you will lose your changes. + > Do not use nuclio dashboard to stop the container because with any modifications, it rebuilds the container and you will lose your changes. \ No newline at end of file diff --git a/tests/cypress/integration/actions_users/issue_1810_login_logout.js b/tests/cypress/integration/actions_users/issue_1810_login_logout.js index 98b6f1e3f4d9..b9bd34f1feda 100644 --- a/tests/cypress/integration/actions_users/issue_1810_login_logout.js +++ b/tests/cypress/integration/actions_users/issue_1810_login_logout.js @@ -35,9 +35,7 @@ context('When clicking on the Logout button, get the user session closed.', () = it('Logout and login to task via GUI', () => { // logout from task cy.get('.cvat-right-header').within(() => { - cy.get('.cvat-header-menu-dropdown') - .should('have.text', Cypress.env('user')) - .trigger('mouseover', { which: 1 }); + cy.get('.cvat-header-menu-dropdown').should('have.text', Cypress.env('user')).trigger('mouseover', { which: 1 }); }); cy.get('span[aria-label="logout"]').click(); cy.url().should('include', `/auth/login/?next=/tasks/${taskId}`); @@ -45,7 +43,9 @@ context('When clicking on the Logout button, get the user session closed.', () = cy.get('[placeholder="Username"]').type(Cypress.env('user')); cy.get('[placeholder="Password"]').type(Cypress.env('password')); cy.get('[type="submit"]').click(); - cy.url().should('include', `/tasks/${taskId}`).and('not.include', '/auth/login/'); + cy.url() + .should('include', `/tasks/${taskId}`) + .and('not.include', '/auth/login/'); cy.contains('.cvat-task-details-task-name', `${taskName}`).should('be.visible'); }); @@ -64,7 +64,7 @@ context('When clicking on the Logout button, get the user session closed.', () = responce = await responce['headers']['set-cookie']; const csrfToken = responce[0].match(/csrftoken=\w+/)[0].replace('csrftoken=', ''); const sessionId = responce[1].match(/sessionid=\w+/)[0].replace('sessionid=', ''); - cy.visit(`/login-with-token/${sessionId}/${csrfToken}?next=/tasks/${taskId}`); + cy.visit(`/login-with-token/${sessionId}/${csrfToken}?next=/tasks/${taskId}`) cy.contains('.cvat-task-details-task-name', `${taskName}`).should('be.visible'); }); }); diff --git a/tests/cypress/support/commands.js b/tests/cypress/support/commands.js index 763b2a293ba6..c067c1358d7d 100644 --- a/tests/cypress/support/commands.js +++ b/tests/cypress/support/commands.js @@ -274,6 +274,7 @@ Cypress.Commands.add('changeWorkspace', (mode, labelName) => { }); Cypress.Commands.add('changeLabelAAM', (labelName) => { + cy.get('.cvat-workspace-selector').then((value) => { const cvatWorkspaceSelectorValue = value.text(); if (cvatWorkspaceSelectorValue.includes('Attribute annotation')) { @@ -419,7 +420,7 @@ Cypress.Commands.add('removeAnnotations', () => { cy.contains('Remove annotations').click(); }); cy.get('.cvat-modal-confirm-remove-annotation').within(() => { - cy.contains('button', 'Delete').click(); + cy.contains('button','Delete').click(); }); }); @@ -575,15 +576,12 @@ Cypress.Commands.add('goToPreviousFrame', (expectedFrameNum) => { Cypress.Commands.add('getObjectIdNumberByLabelName', (labelName) => { cy.document().then((doc) => { - const stateItemLabelSelectorList = Array.from( - doc.querySelectorAll('.cvat-objects-sidebar-state-item-label-selector'), - ); + const stateItemLabelSelectorList = Array.from(doc.querySelectorAll('.cvat-objects-sidebar-state-item-label-selector')); for (let i = 0; i < stateItemLabelSelectorList.length; i++) { if (stateItemLabelSelectorList[i].textContent === labelName) { cy.get(stateItemLabelSelectorList[i]) .parents('.cvat-objects-sidebar-state-item') - .should('have.attr', 'id') - .then((id) => { + .should('have.attr', 'id').then((id) => { return Number(id.match(/\d+$/)); }); } From 82adc0beafa64757fb17b7c1583af0059c77f810 Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 15 Apr 2021 03:13:42 +0300 Subject: [PATCH 09/26] tmp --- cvat/apps/engine/admin.py | 2 +- cvat/apps/engine/cache.py | 34 +++- cvat/apps/engine/cloud_provider.py | 146 +++++++++------- cvat/apps/engine/log.py | 30 +++- ...218_1751.py => 0039_auto_20210414_2110.py} | 12 +- cvat/apps/engine/models.py | 18 +- cvat/apps/engine/serializers.py | 55 +++--- cvat/apps/engine/task.py | 58 +++---- cvat/apps/engine/views.py | 159 ++++++++++++------ utils/dataset_manifest/core.py | 30 +++- 10 files changed, 339 insertions(+), 205 deletions(-) rename cvat/apps/engine/migrations/{0036_auto_20201218_1751.py => 0039_auto_20210414_2110.py} (78%) diff --git a/cvat/apps/engine/admin.py b/cvat/apps/engine/admin.py index ddebe5020684..7a919d624507 100644 --- a/cvat/apps/engine/admin.py +++ b/cvat/apps/engine/admin.py @@ -88,7 +88,7 @@ class CloudStorageAdmin(admin.ModelAdmin): date_hierarchy = 'updated_date' readonly_fields = ('created_date', 'updated_date', 'provider_type') list_display = ('__str__', 'owner', 'created_date', 'updated_date') - search_fields = ('provider_type', 'resource_name', 'owner__username', 'owner__first_name', + search_fields = ('provider_type', 'resource', 'owner__username', 'owner__first_name', 'owner__last_name', 'owner__email',) empty_value_display = 'unknown' diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index e1665081f516..999a271dc6b4 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -13,7 +13,8 @@ ImageDatasetManifestReader, VideoDatasetManifestReader) from cvat.apps.engine.models import DataChoice, StorageChoice from cvat.apps.engine.models import DimensionType -from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials +from cvat.apps.engine.cloud_provider import CloudStorage, Credentials +from cvat.apps.engine.utils import md5_hash class CacheInteraction: def __init__(self, dimension=DimensionType.DIM_2D): self._cache = Cache(settings.CACHE_ROOT) @@ -65,9 +66,34 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): chunk_number=chunk_number, chunk_size=db_data.chunk_size, start=db_data.start_frame, stop=db_data.stop_frame, step=db_data.get_frame_step()) - for item in reader: - source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}") - images.append((source_path, source_path, None)) + if db_data.storage == StorageChoice.CLOUD_STORAGE: + db_cloud_storage = db_data.cloud_storage + credentials = Credentials() + credentials.convert_from_db({ + 'type': db_cloud_storage.credentials_type, + 'value': db_cloud_storage.credentials, + }) + details = { + 'resource': db_cloud_storage.resource, + 'credentials': credentials + } + cloud_storage_instance = CloudStorage(cloud_provider=db_cloud_storage.provider_type, **details) + cloud_storage_instance.initialize_content() + for item in reader: + name = f"{item['name']}{item['extension']}" + source_path = os.path.join(upload_dir, name) + if name not in cloud_storage_instance: + raise Exception('{} file was not found on a {} storage'.format(name, cloud_storage_instance.name)) + cloud_storage_instance.download_file(name, source_path) + assert item.get('checksum', None), \ + 'A manifest file does not contain checksum for image {}'.format(item.get('name')) + assert md5_hash(source_path) == item.get('checksum'), \ + 'Hash sums of files {} do not match'.format(name) + images.append((source_path, source_path, None)) + else: + for item in reader: + source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}") + images.append((source_path, source_path, None)) writer.save_as_chunk(images, buff) buff.seek(0) return buff, mime_type diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 38f7b4d98770..dc9f2247eba3 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -4,7 +4,7 @@ import boto3 from boto3.s3.transfer import TransferConfig -from botocore.exceptions import WaiterError +from botocore.exceptions import WaiterError, NoCredentialsError from azure.storage.blob import BlobServiceClient from azure.core.exceptions import ResourceExistsError @@ -13,7 +13,7 @@ from cvat.apps.engine.log import slogger from cvat.apps.engine.models import CredentialsTypeChoice, CloudProviderChoice -class CloudStorage(ABC): +class _CloudStorage(ABC): def __init__(self): self._files = [] @@ -64,46 +64,63 @@ def __len__(self): @property def content(self): - return map(lambda x: x['name'] , self._files) - -def get_cloud_storage_instance(cloud_provider, **details): - instance = None - if cloud_provider == CloudProviderChoice.AWS_S3: - instance = AWS_S3( - bucket=details.get('resource_name'), - session_token=details.get('session_token'), - key_id=details.get('key'), - secret_key=details.get('secret_key') - ) - elif cloud_provider == CloudProviderChoice.AZURE_CONTAINER: - instance = AzureBlobContainer( - container_name=details.get('resource_name'), - sas_token=details.get('session_token'), - accounr_name=details.get('key'), - account_access_key=details.get('secret_key') - ) - return instance + return list(map(lambda x: x['name'] , self._files)) + +# def get_cloud_storage_instance(cloud_provider, resource, credentials): +# instance = None +# проверить креденшелы! +# if cloud_provider == CloudProviderChoice.AWS_S3: +# instance = AWS_S3( +# bucket=resource, +# session_token=credentials.session_token, +# ) +# elif cloud_provider == CloudProviderChoice.AZURE_CONTAINER: +# instance = AzureBlobContainer( +# container_name=resource, +# sas_token=credentials.session_token, +# ) +# return instance + +# TODO: подумать возможно оставить функцию provider вместо класса ниже +class CloudStorage: + def __init__(self, cloud_provider, resource, credentials): + if cloud_provider == CloudProviderChoice.AWS_S3: + self.__instance = AWS_S3( + bucket=resource, + access_key_id=credentials.key, + secret_key=credentials.secret_key, + session_token=credentials.session_token, + ) + elif cloud_provider == CloudProviderChoice.AZURE_CONTAINER: + self.__instance = AzureBlobContainer( + container=resource, + account_name=credentials.account_name, + sas_token=credentials.session_token, + ) + else: + raise NotImplementedError() -class AWS_S3(CloudStorage): - def __init__(self, **kwargs): - super().__init__() - assert (bucket_name := kwargs.get('bucket')), 'Bucket name was not found' - self._bucket_name = bucket_name - - key_id, secret_key = None, None - if (session_token := kwargs.get('session_token')): - assert (key_id := kwargs.get('key_id')), 'Key id was not found' - assert (secret_key := kwargs.get('secret_key')), 'Secret key was not found' - - self._client_s3 = boto3.client( - 's3', - aws_access_key_id=key_id, - aws_secret_access_key=secret_key, - aws_session_token=session_token - ) + def __getattr__(self, name): + assert hasattr(self.__instance, name), 'Unknown behavior: {}'.format(name) + return self.__instance.__getattribute__(name) +class AWS_S3(_CloudStorage): + def __init__(self, bucket, access_key_id=None, secret_key=None, session_token=None): + super().__init__() + if all([access_key_id, secret_key, session_token]): + self._client_s3 = boto3.client( + 's3', + aws_access_key_id=access_key_id, + aws_secret_access_key=secret_key, + aws_session_token=session_token, + ) + elif any([access_key_id, secret_key, session_token]): + raise Exception('Insufficient data for authorization') + else: + # anonymous access + self._client_s3 = boto3.client('s3') self._s3 = boto3.resource('s3') - self._bucket = self._s3.Bucket(bucket_name) + self._bucket = self._s3.Bucket(bucket) @property def bucket(self): @@ -111,7 +128,7 @@ def bucket(self): @property def name(self): - return self._bucket_name + return self._bucket.name # def is_object_exist(self, verifiable='bucket_exist', config=None): # waiter = self._client_s3.get_waiter(verifiable) @@ -121,14 +138,14 @@ def is_exist(self): waiter = self._client_s3.get_waiter('bucket_exists') try: waiter.wait( - Bucket=self._bucket_name, + Bucket=self.name, WaiterConfig={ 'Delay': 5, # The amount of time in seconds to wait between attempts. Default: 5 'MaxAttempts': 3 # The maximum number of attempts to be made. Default: 20 } ) except WaiterError: - raise Exception('A resource {} unavailable'.format(self._bucket_name)) + raise Exception('A resource {} unavailable'.format(self.name)) def is_object_exist(self, key_object): waiter = self._client_s3.get_waiter('object_exists') @@ -183,23 +200,21 @@ def create(self): }, ObjectLockEnabledForBucket=False ) - except Exception as ex:#botocore.errorfactory.BucketAlreadyExists + except Exception as ex: msg = str(ex) slogger.glob.info(msg) raise Exception(str(ex)) -class AzureBlobContainer(CloudStorage): +class AzureBlobContainer(_CloudStorage): - def __init__(self, **kwargs): + def __init__(self, container, account_name, sas_token=None): super().__init__() - assert (container_name := kwargs.get('container_name')), 'Container name was not found' - assert (account_name := kwargs.get('account_name')), 'Account name was not found' - assert (credentials := kwargs.get('sas_token') if kwargs.get('sas_token') else kwargs.get('account_access_key')), 'Credentials were not granted' - - self._blob_service_client = BlobServiceClient(account_url=self.account_url, credential=credentials) - self._container_client = self._blob_service_client.get_container_client(container_name) - self._account_name = account_name + if sas_token: + self._blob_service_client = BlobServiceClient(account_url=self.account_url, credential=sas_token) + else: + self._blob_service_client = BlobServiceClient(account_url=self.account_url) + self._container_client = self._blob_service_client.get_container_client(container) @property def container(self): @@ -270,41 +285,44 @@ def download_fileobj(self, key): buf.seek(0) return buf -class GOOGLE_DRIVE(CloudStorage): +class GOOGLE_DRIVE(_CloudStorage): pass class Credentials: - __slots__ = ('key', 'secret_key', 'session_token', 'credentials_type') + __slots__ = ('key', 'secret_key', 'session_token', 'account_name', 'credentials_type') def __init__(self, **credentials): self.key = credentials.get('key', '') self.secret_key = credentials.get('secret_key', '') self.session_token = credentials.get('session_token', '') + self.account_name = credentials.get('account_name', '') self.credentials_type = credentials.get('credentials_type', None) def convert_to_db(self): converted_credentials = { - CredentialsTypeChoice.TOKEN : self.session_token, - CredentialsTypeChoice.KEY_TOKEN_PAIR : " ".join([self.key, self.session_token]), - CredentialsTypeChoice.KEY_SECRET_KEY_PAIR : " ".join([self.key, self.secret_key]) + CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_PAIR : \ + " ".join([self.key, self.secret_key, self.session_token]), + CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR : " ".join([self.account_name, self.session_token]), + CredentialsTypeChoice.ANONYMOUS_ACCESS: "", } return converted_credentials[self.credentials_type] def convert_from_db(self, credentials): self.credentials_type = credentials.get('type') - if self.credentials_type == CredentialsTypeChoice.TOKEN: - self.session_token = credentials.get('value') + if self.credentials_type == CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_PAIR: + self.key, self.secret_key, self.session_token = credentials.get('value').split() + elif self.credentials_type == CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR: + self.account_name, self.session_token = credentials.get('value').split() else: - self.key, second = credentials.get('value').split() - if self.credentials_type == CredentialsTypeChoice.KEY_TOKEN_PAIR: - self.session_token = second - else: self.secret_key = second + self.account_name, self.session_token, self.key, self.secret_key = ("", "", "", "") + self.credentials_type = None def mapping_with_new_values(self, credentials): self.credentials_type = credentials.get('credentials_type', self.credentials_type) self.key = credentials.get('key', self.key) self.secret_key = credentials.get('secret_key', self.secret_key) self.session_token = credentials.get('session_token', self.session_token) + self.account_name = credentials.get('account_name', self.account_name) def values(self): - return [self.key, self.secret_key, self.session_token] + return [self.key, self.secret_key, self.session_token, self.account_name] diff --git a/cvat/apps/engine/log.py b/cvat/apps/engine/log.py index d8804e26cd46..6a0a197b818c 100644 --- a/cvat/apps/engine/log.py +++ b/cvat/apps/engine/log.py @@ -4,7 +4,7 @@ import logging from cvat.settings.base import LOGGING -from .models import Job, Task, Project +from .models import Job, Task, Project, CloudStorage def _get_project(pid): try: @@ -24,6 +24,12 @@ def _get_job(jid): except Exception: raise Exception('{} key must be a job identifier'.format(jid)) +def _get_storage(storage_id): + try: + return CloudStorage.objects.get(pk=storage_id) + except Exception: + raise Exception('{} key must be a cloud storage identifier'.format(storage_id)) + class ProjectLoggerStorage: def __init__(self): self._storage = dict() @@ -79,6 +85,27 @@ def _get_task_logger(self, jid): job = _get_job(jid) return slogger.task[job.segment.task.id] +class CloudSourceLoggerStorage: + def __init__(self): + self._storage = dict() + + def __getitem__(self, sid): + """Get ceratain storage object for some cloud storage.""" + if sid not in self._storage: + self._storage[sid] = self._create_cloud_storage_logger(sid) + return self._storage[sid] + + def _create_cloud_storage_logger(self, sid): + cloud_storage = _get_storage(sid) + + logger = logging.getLogger('cvat.server.cloud_storage_{}'.format(sid)) + server_file = logging.FileHandler(filename=cloud_storage.get_log_path()) + formatter = logging.Formatter(LOGGING['formatters']['standard']['format']) + server_file.setFormatter(formatter) + logger.addHandler(server_file) + + return logger + class ProjectClientLoggerStorage: def __init__(self): self._storage = dict() @@ -144,5 +171,6 @@ class dotdict(dict): 'project': ProjectLoggerStorage(), 'task': TaskLoggerStorage(), 'job': JobLoggerStorage(), + 'cloud_storage': CloudSourceLoggerStorage(), 'glob': logging.getLogger('cvat.server'), }) diff --git a/cvat/apps/engine/migrations/0036_auto_20201218_1751.py b/cvat/apps/engine/migrations/0039_auto_20210414_2110.py similarity index 78% rename from cvat/apps/engine/migrations/0036_auto_20201218_1751.py rename to cvat/apps/engine/migrations/0039_auto_20210414_2110.py index a7c05ae012ce..8f915e277dee 100644 --- a/cvat/apps/engine/migrations/0036_auto_20201218_1751.py +++ b/cvat/apps/engine/migrations/0039_auto_20210414_2110.py @@ -1,4 +1,4 @@ -# Generated by Django 3.1.1 on 2020-12-18 17:51 +# Generated by Django 3.1.7 on 2021-04-14 21:10 import cvat.apps.engine.models from django.conf import settings @@ -10,7 +10,7 @@ class Migration(migrations.Migration): dependencies = [ migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ('engine', '0035_data_storage'), + ('engine', '0038_manifest'), ] operations = [ @@ -24,16 +24,16 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('provider_type', models.CharField(choices=[('AWS_S3_BUCKET', 'AWS_S3'), ('AZURE_CONTAINER', 'AZURE_CONTAINER'), ('GOOGLE_DRIVE', 'GOOGLE_DRIVE')], max_length=20)), - ('resource_name', models.CharField(max_length=50)), + ('resource', models.CharField(max_length=50)), ('created_date', models.DateTimeField(auto_now_add=True)), ('updated_date', models.DateTimeField(auto_now=True)), - ('credentials', models.CharField(max_length=100, unique=True)), - ('credentials_type', models.CharField(choices=[('TOKEN', 'TOKEN'), ('KEY_TOKEN_PAIR', 'KEY_TOKEN_PAIR'), ('KEY_SECRET_KEY_PAIR', 'KEY_SECRET_KEY_PAIR')], max_length=20)), + ('credentials', models.CharField(max_length=500)), + ('credentials_type', models.CharField(choices=[('TEMP_KEY_SECRET_KEY_TOKEN_PAIR', 'TEMP_KEY_SECRET_KEY_TOKEN_PAIR'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=30)), ('owner', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='cloud_storages', to=settings.AUTH_USER_MODEL)), ], options={ 'default_permissions': (), - 'unique_together': {('provider_type', 'resource_name', 'credentials')}, + 'unique_together': {('provider_type', 'resource', 'credentials')}, }, ), migrations.AddField( diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 31738349b0af..9d4dad732788 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -270,7 +270,7 @@ class ServerFile(models.Model): class Meta: default_permissions = () -# For URLs and files on remote cloud storages +# For URLs class RemoteFile(models.Model): data = models.ForeignKey(Data, on_delete=models.CASCADE, null=True, related_name='remote_files') file = models.CharField(max_length=1024) @@ -523,9 +523,9 @@ def __str__(self): return self.value class CredentialsTypeChoice(str, Enum): - TOKEN = 'TOKEN' - KEY_TOKEN_PAIR = 'KEY_TOKEN_PAIR' - KEY_SECRET_KEY_PAIR = 'KEY_SECRET_KEY_PAIR' + TEMP_KEY_SECRET_KEY_TOKEN_PAIR = 'TEMP_KEY_SECRET_KEY_TOKEN_PAIR' + ACCOUNT_NAME_TOKEN_PAIR = 'ACCOUNT_NAME_TOKEN_PAIR' + ANONYMOUS_ACCESS = 'ANONYMOUS_ACCESS' @classmethod def choices(cls): @@ -540,20 +540,20 @@ def __str__(self): class CloudStorage(models.Model): provider_type = models.CharField(max_length=20, choices=CloudProviderChoice.choices()) - resource_name = models.CharField(max_length=50) + resource = models.CharField(max_length=50) owner = models.ForeignKey(User, null=True, blank=True, on_delete=models.SET_NULL, related_name="cloud_storages") created_date = models.DateTimeField(auto_now_add=True) updated_date = models.DateTimeField(auto_now=True) - credentials = models.CharField(max_length=100, unique=True) - credentials_type = models.CharField(max_length=20, choices=CredentialsTypeChoice.choices())#auth_type + credentials = models.CharField(max_length=500) + credentials_type = models.CharField(max_length=30, choices=CredentialsTypeChoice.choices())#auth_type class Meta: default_permissions = () - unique_together = (('provider_type', 'resource_name', 'credentials'),) + unique_together = (('provider_type', 'resource', 'credentials'),) def __str__(self): - return "{} {} {}".format(self.provider_type, self.resource_name, self.id) + return "{} {} {}".format(self.provider_type, self.resource, self.id) def get_storage_dirname(self): return os.path.join(settings.CLOUD_STORAGE_ROOT, str(self.id)) diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index ec251e637211..2dcc02e8a183 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -12,7 +12,7 @@ from cvat.apps.engine import models from cvat.apps.engine.log import slogger from cvat.apps.dataset_manager.formats.utils import get_label_color -from cvat.apps.engine.cloud_provider import Credentials, get_cloud_storage_instance +from cvat.apps.engine.cloud_provider import Credentials, CloudStorage class BasicUserSerializer(serializers.ModelSerializer): def validate(self, data): @@ -696,54 +696,47 @@ class Meta: class CloudStorageSerializer(serializers.ModelSerializer): owner = BasicUserSerializer(required=False) - session_token = serializers.CharField(max_length=50, allow_blank=True, required=False) - key = serializers.CharField(max_length=30, allow_blank=True, required=False) - secret_key = serializers.CharField(max_length=50, allow_blank=True, required=False) + session_token = serializers.CharField(max_length=400, allow_blank=True, required=False) + key = serializers.CharField(max_length=40, allow_blank=True, required=False) + secret_key = serializers.CharField(max_length=60, allow_blank=True, required=False) + account_name = serializers.CharField(max_length=50, allow_blank=True, required=False) class Meta: model = models.CloudStorage fields = ( - 'provider_type', 'resource_name', 'session_token', 'owner', - 'key', 'secret_key', 'credentials_type', 'created_date', 'updated_date', + 'provider_type', 'resource', 'owner', 'credentials_type', + 'created_date', 'updated_date', 'session_token', 'account_name', 'key', + 'secret_key' ) read_only_fields = ('created_date', 'updated_date', 'owner') def validate(self, attrs): - credentials = Credentials( - key = attrs.get('key'), - secret_key = attrs.get('secret_key'), - session_token = attrs.get('session_token'), - ) - if any(credentials.values()): - if attrs.get('provider_type') == models.CloudProviderChoice.AZURE_CONTAINER and not credentials.key: - raise serializers.ValidationError('A credentials were not found') - else: - # no access rights granted - raise serializers.ValidationError('A credentials were not found') + if attrs.get('provider_type') == models.CloudProviderChoice.AZURE_CONTAINER: + if not attrs.get('account_name', ''): + raise exceptions.PermissionDenied('Account name for Azure container was not specified') return attrs def create(self, validated_data): provider_type = validated_data.get('provider_type') should_be_created = validated_data.pop('should_be_created', None) credentials = Credentials( - key = validated_data.pop('key', ''), - secret_key = validated_data.pop('secret_key', ''), - session_token = validated_data.pop('session_token', ''), + account_name=validated_data.pop('account_name', ''), + key=validated_data.pop('key', ''), + secret_key=validated_data.pop('secret_key', ''), + session_token=validated_data.pop('session_token', ''), credentials_type = validated_data.get('credentials_type') ) if should_be_created: details = { - 'resource_name': validated_data.get('resource_name'), - 'session_token': credentials.session_token, - 'key': credentials.key, - 'secret_key': credentials.secret_key, + 'resource': validated_data.get('resource'), + 'credentials': credentials } - cloud_storage_instance = get_cloud_storage_instance(cloud_provider=provider_type, **details) - + storage = CloudStorage(cloud_provider=provider_type, **details) try: - cloud_storage_instance.create() - except Exception: - pass + storage.create() + except Exception as ex: + slogger.glob.warning("Failed with creating storage\n{}".format(str(ex))) + raise db_storage = models.CloudStorage.objects.create( credentials=credentials.convert_to_db(), @@ -759,11 +752,11 @@ def update(self, instance, validated_data): 'type': instance.credentials_type, 'value': instance.credentials, }) - tmp = {k:v for k,v in validated_data.items() if k in ('key', 'secret_key', 'session_token', 'credentials_type')} + tmp = {k:v for k,v in validated_data.items() if k in {'key', 'secret_key', 'account_name', 'session_token', 'credentials_type'}} credentials.mapping_with_new_values(tmp) instance.credentials = credentials.convert_to_db() instance.credentials_type = validated_data.get('credentials_type', instance.credentials_type) - instance.resource_name = validated_data.get('resource_name', instance.resource_name) + instance.resource = validated_data.get('resource', instance.resource) instance.save() return instance \ No newline at end of file diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index b7a3b95bca78..2181fc542011 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -14,7 +14,8 @@ import requests from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES, Mpeg4ChunkWriter, ZipChunkWriter, Mpeg4CompressedChunkWriter, ZipCompressedChunkWriter, ValidateDimension -from cvat.apps.engine.models import DataChoice, StorageMethodChoice, StorageChoice, RelatedFile +from cvat.apps.engine.models import ( + DataChoice, StorageMethodChoice, StorageChoice, RelatedFile) from cvat.apps.engine.utils import av_scan_paths from cvat.apps.engine.models import DimensionType from utils.dataset_manifest import ImageManifestManager, VideoManifestManager @@ -27,6 +28,7 @@ from . import models from .log import slogger +from .cloud_provider import CloudStorage, Credentials ############################# Low Level server API @@ -354,8 +356,8 @@ def _update_status(msg): manifest.validate_frame_numbers() assert len(manifest) > 0, 'No key frames.' - all_frames = manifest['properties']['length'] - video_size = manifest['properties']['resolution'] + all_frames = manifest.video_length + video_size = manifest.video_resolution manifest_is_prepared = True except Exception as ex: if os.path.exists(db_data.get_index_path()): @@ -399,6 +401,8 @@ def _update_status(msg): db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: + if db_data.storage == StorageChoice.CLOUD_STORAGE: + raise Exception('A manifest file was not foud') if db_task.dimension == DimensionType.DIM_2D: meta_info = manifest.prepare_meta( sources=extractor.absolute_source_paths, @@ -414,14 +418,29 @@ def _update_status(msg): 'extension': ext }) manifest.create(content) + if db_data.storage == StorageChoice.CLOUD_STORAGE: + db_cloud_storage = db_data.cloud_storage + credentials = Credentials() + credentials.convert_from_db({ + 'type': db_cloud_storage.credentials_type, + 'value': db_cloud_storage.value, + }) + + details = { + 'resource': db_cloud_storage.resource, + 'credentials': credentials, + } + cloud_storage_instance = CloudStorage(cloud_provider=db_cloud_storage.provider_type, **details) + cloud_storage_instance.download_file(manifest_file[0], db_data.get_manifest_path()) manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby(extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] - - for _, frame_id in chunk_paths: + for abs_path, frame_id in chunk_paths: properties = manifest[frame_id] + assert abs_path.endswith(os.path.basename(f"{properties['name']}{properties['extension']}")), \ + 'Uploaded files don`t mappimg with the uploaded manifest' if db_task.dimension == DimensionType.DIM_2D: resolution = (properties['width'], properties['height']) else: @@ -435,35 +454,6 @@ def _update_status(msg): for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) - # def processing_files_on_cloud_storage(): - # from .cloud_provider import Credentials, get_cloud_storage_instance - # from cvat.apps.engine.models import CloudProviderChoice - - # #TODO: only on first iteration of implementation - # if media_type != 'images': - # raise NotImplementedError() - - # if not meta_info_file: - # raise Exception('A meta information was not found') - - # db_cloud_storage = db_data.cloud_storage - # credentials = Credentials() - # credentials.convert_from_db({ - # 'type': db_cloud_storage.credentials_type, - # 'value': db_cloud_storage.value, - # }) - - # details = { - # 'resource_name': db_cloud_storage.resource_name, - # 'session_token': credentials.session_token, - # 'key': credentials.key, - # 'secret_key': credentials.secret_key, - # } - # cloud_storage_instance = get_cloud_storage_instance(cloud_provider=provider_type, **details) - # meta = cloud_storage_instance.download_file(meta_info_file[0]) - # #TODO - - if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 31d502f97e07..0890fe750c4f 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -5,11 +5,12 @@ import os import os.path as osp import io +import json import shutil import traceback from datetime import datetime from distutils.util import strtobool -from tempfile import mkstemp +from tempfile import mkstemp, NamedTemporaryFile import cv2 import django_rq @@ -18,7 +19,7 @@ from django.conf import settings from django.contrib.auth.models import User from django.db import IntegrityError -from django.http import HttpResponse, HttpResponseNotFound +from django.http import HttpResponse, HttpResponseNotFound, HttpResponseBadRequest from django.utils import timezone from django.utils.decorators import method_decorator from django_filters import rest_framework as filters @@ -37,15 +38,17 @@ import cvat.apps.dataset_manager as dm import cvat.apps.dataset_manager.views # pylint: disable=unused-import from cvat.apps.authentication import auth +from cvat.apps.engine.cloud_provider import Credentials, CloudStorage from cvat.apps.dataset_manager.bindings import CvatImportError from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.models import ( Job, StatusChoice, Task, Project, Review, Issue, Comment, StorageMethodChoice, ReviewStatus, StorageChoice, DimensionType, Image, - CloudStorage, CredentialsTypeChoice, CloudProviderChoice + CredentialsTypeChoice, CloudProviderChoice ) +from cvat.apps.engine.models import CloudStorage as CloudStorageModel from cvat.apps.engine.serializers import ( AboutSerializer, AnnotationFileSerializer, BasicUserSerializer, DataMetaSerializer, DataSerializer, ExceptionSerializer, @@ -56,12 +59,11 @@ CloudStorageSerializer, BaseCloudStorageSerializer ) from cvat.apps.engine.utils import av_scan_paths +from utils.dataset_manifest import ImageManifestManager from . import models, task from .log import clogger, slogger -from cvat.apps.engine.cloud_provider import Credentials, get_cloud_storage_instance - class ServerViewSet(viewsets.ViewSet): serializer_class = None @@ -981,30 +983,44 @@ def self(self, request): @method_decorator(name='list', decorator=swagger_auto_schema( - operation_summary='Returns a paginated list of storages according to query parameters', - manual_parameters=[ - openapi.Parameter('provider_type', openapi.IN_QUERY, description="A supported provider of cloud storages", - type=openapi.TYPE_STRING, enum=CloudProviderChoice.list()), - openapi.Parameter('resource_name', openapi.IN_QUERY, description="A name of buket or container", type=openapi.TYPE_STRING), - openapi.Parameter('owner', openapi.IN_QUERY, description="A resource owner", type=openapi.TYPE_STRING), - openapi.Parameter('credentials_type', openapi.IN_QUERY, description="A type of a granting access", type=openapi.TYPE_STRING, enum=CredentialsTypeChoice.list()), - ], - responses={'200': CloudStorageSerializer(many=True)} + operation_summary='Returns a paginated list of storages according to query parameters', + manual_parameters=[ + openapi.Parameter('provider_type', openapi.IN_QUERY, description="A supported provider of cloud storages", + type=openapi.TYPE_STRING, enum=CloudProviderChoice.list()), + openapi.Parameter('resource', openapi.IN_QUERY, description="A name of buket or container", type=openapi.TYPE_STRING), + openapi.Parameter('owner', openapi.IN_QUERY, description="A resource owner", type=openapi.TYPE_STRING), + openapi.Parameter('credentials_type', openapi.IN_QUERY, description="A type of a granting access", type=openapi.TYPE_STRING, enum=CredentialsTypeChoice.list()), + ], + responses={'200': CloudStorageSerializer(many=True)}, + tags=['cloud storages'] + ) +) +@method_decorator(name='retrieve', decorator=swagger_auto_schema( + operation_summary='Method returns details of a specific cloud storage', + tags=['cloud storages'] ) ) -@method_decorator(name='retrieve', decorator=swagger_auto_schema(operation_summary='Method returns details of a specific cloud storage')) @method_decorator(name='create', decorator=swagger_auto_schema( operation_summary='Method creates a cloud storage with a specified characteristics', - responses={'201': openapi.Response(description='A storage has beed created')} + responses={'201': openapi.Response(description='A storage has beed created')}, + tags=['cloud storages'] + ) +) +@method_decorator(name='destroy', decorator=swagger_auto_schema( + operation_summary='Method deletes a specific cloud storage', + tags=['cloud storages'] + ) +) +@method_decorator(name='partial_update', decorator=swagger_auto_schema( + operation_summary='Methods does a partial update of chosen fields in a cloud storage instance', + tags=['cloud storages'] ) ) -@method_decorator(name='destroy', decorator=swagger_auto_schema(operation_summary='Method deletes a specific cloud storage')) -@method_decorator(name='partial_update', decorator=swagger_auto_schema(operation_summary='Methods does a partial update of chosen fields in a cloud storage instance')) class CloudStorageViewSet(auth.CloudStorageGetQuerySetMixin, viewsets.ModelViewSet): http_method_names = ['get', 'post', 'patch', 'delete'] - queryset = CloudStorage.objects.all().prefetch_related('data').order_by('-id') - search_fields = ("provider_type", "resource_name", "owner__username") - filterset_fields = ['provider_type', 'resource_name', 'credentials_type'] + queryset = CloudStorageModel.objects.all().prefetch_related('data').order_by('-id') + search_fields = ("provider_type", "resource", "owner__username") + filterset_fields = ['provider_type', 'resource', 'credentials_type'] def get_permissions(self): http_method = self.request.method @@ -1036,15 +1052,19 @@ def get_queryset(self): def perform_create(self, serializer): # check that instance of cloud storage exists provider_type = serializer.validated_data.get('provider_type') + credentials = Credentials( + session_token=serializer.validated_data.get('session_token', ''), + account_name=serializer.validated_data.get('account_name', ''), + key=serializer.validated_data.get('key', ''), + secret_key=serializer.validated_data.get('secret_key', '') + ) details = { - 'resource_name': serializer.validated_data.get('resource_name'), - 'session_token': serializer.validated_data.get('session_token'), - 'key': serializer.validated_data.get('key'), - 'secret_key': serializer.validated_data.get('secret_key'), + 'resource': serializer.validated_data.get('resource'), + 'credentials': credentials, } - cloud_storage_instance = get_cloud_storage_instance(cloud_provider=provider_type, **details) + storage = CloudStorage(cloud_provider=provider_type, **details) try: - cloud_storage_instance.is_exist() + storage.is_exist() except Exception as ex: message = str(ex) slogger.glob.error(message) @@ -1064,33 +1084,68 @@ def perform_destroy(self, instance): @method_decorator( name='retrieve', decorator=swagger_auto_schema( - operation_summary='Method returns list of available files', - responses={'200': openapi.Response(description='A list of a storage content')} + operation_summary='Method returns details of a cloud storage', + operation_description= + "Method returns list of available files, if action is content", + manual_parameters=[ + openapi.Parameter('action', openapi.IN_QUERY, description="", + type=openapi.TYPE_STRING, enum=['content']), + openapi.Parameter('manifest_path', openapi.IN_QUERY, + description="Path to the manifest file in a cloud storage", + type=openapi.TYPE_STRING + )], + responses={ + '200': openapi.Response(description='A list of a storage content'), + }, + tags=['cloud storages'] ) ) - @action(detail=True, methods=['GET']) - def content(self, request, pk): - try: - db_storage = CloudStorage.objects.get(pk=pk) - credentials = Credentials() - credentials.convert_from_db({ - 'type': db_storage.credentials_type, - 'value': db_storage.credentials, - }) - details = { - 'resource_name': db_storage.resource_name, - 'session_token': credentials.session_token, - 'key': credentials.key, - 'secret_key': credentials.secret_key, - } - cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) - cloud_storage_instance.initialize_content() - return Response(data=cloud_storage_instance.content, content_type="text/plain") - - except CloudStorage.DoesNotExist: - message = f"Storage {pk} does not exist" - slogger.glob.error(message) - return HttpResponseNotFound(message) + def retrieve(self, request, *args, **kwargs): + action = request.query_params.get('action', '') + if action == 'content': + try: + pk = kwargs.get('pk') + db_storage = CloudStorageModel.objects.get(pk=pk) + credentials = Credentials() + credentials.convert_from_db({ + 'type': db_storage.credentials_type, + 'value': db_storage.credentials, + }) + details = { + 'resource': db_storage.resource, + 'credentials': credentials, + } + storage = CloudStorage(cloud_provider=db_storage.provider_type, **details) + storage.initialize_content() + storage_files = storage.content + + manifest_path = request.query_params.get('manifest_path', 'manifest.jsonl') + tmp_manifest = NamedTemporaryFile(mode='w+b', suffix='cvat', prefix='manifest') + storage.download_file(manifest_path, tmp_manifest.name) + manifest = ImageManifestManager(tmp_manifest.name) + manifest.init_index() + manifest_files = manifest.data + tmp_manifest.close() + content = {f:[] for f in set(storage_files) & set(manifest_files)} + for key, _ in content.items(): + if key in storage_files: content[key].append('s') # storage + if key in manifest_files: content[key].append('m') # manifest + + data = json.loads(content) + return Response(data=data, content_type="aplication/json") + + except CloudStorageModel.DoesNotExist: + message = f"Storage {pk} does not exist" + slogger.glob.error(message) + return HttpResponseNotFound(message) + except Exception as ex: + return HttpResponseBadRequest(str(ex)) + elif not action: + instance = self.get_object() + serializer = self.get_serializer(instance) + return Response(serializer.data) + else: + return HttpResponseBadRequest() def rq_handler(job, exc_type, exc_value, tb): job.exc_info = "".join( diff --git a/utils/dataset_manifest/core.py b/utils/dataset_manifest/core.py index 78a00b0b98bf..ea9837160ec6 100644 --- a/utils/dataset_manifest/core.py +++ b/utils/dataset_manifest/core.py @@ -5,7 +5,7 @@ import av import json import os -from abc import ABC, abstractmethod +from abc import ABC, abstractmethod, abstractproperty from collections import OrderedDict from contextlib import closing from PIL import Image @@ -324,6 +324,10 @@ def __getitem__(self, item): def index(self): return self._index + @abstractproperty + def data(self): + pass + class VideoManifestManager(_ManifestManager): def __init__(self, manifest_path, *args, **kwargs): super().__init__(manifest_path) @@ -373,6 +377,22 @@ def prepare_meta(media_file, upload_dir=None, chunk_size=36, force=False): meta_info.validate_seek_key_frames() return meta_info + @property + def video_name(self): + return self['properties']['name'] + + @property + def video_resolution(self): + return self['properties']['resolution'] + + @property + def video_length(self): + return self['properties']['length'] + + @property + def data(self): + return [self.video_name] + #TODO: add generic manifest structure file validation class ManifestValidator: def validate_base_info(self): @@ -410,7 +430,7 @@ def validate_frame_numbers(self): # not all videos contain information about numbers of frames frames = video_stream.frames if frames: - assert frames == self['properties']['length'], "The uploaded manifest does not match the video" + assert frames == self.video_length, "The uploaded manifest does not match the video" return class ImageManifestManager(_ManifestManager): @@ -443,4 +463,8 @@ def partial_update(self, number, properties): def prepare_meta(sources, **kwargs): meta_info = DatasetImagesReader(sources=sources, **kwargs) meta_info.create() - return meta_info \ No newline at end of file + return meta_info + + @property + def data(self): + return [f"{image['name']}{image['extension']}" for _, image in self] \ No newline at end of file From 8af9a399a1a47a33df65dfa8f8aed86447f9aeab Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 16 Apr 2021 00:31:08 +0300 Subject: [PATCH 10/26] Fix bucket public access --- cvat/apps/engine/cloud_provider.py | 8 +++++--- cvat/apps/engine/task.py | 2 +- cvat/apps/engine/views.py | 5 ++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index dc9f2247eba3..423e0c977f18 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -5,6 +5,7 @@ import boto3 from boto3.s3.transfer import TransferConfig from botocore.exceptions import WaiterError, NoCredentialsError +from botocore.handlers import disable_signing from azure.storage.blob import BlobServiceClient from azure.core.exceptions import ResourceExistsError @@ -116,10 +117,11 @@ def __init__(self, bucket, access_key_id=None, secret_key=None, session_token=No ) elif any([access_key_id, secret_key, session_token]): raise Exception('Insufficient data for authorization') - else: - # anonymous access - self._client_s3 = boto3.client('s3') self._s3 = boto3.resource('s3') + # anonymous access + if not any([access_key_id, secret_key, session_token]): + self._s3.meta.client.meta.events.register('choose-signer.s3.*', disable_signing) + self._client_s3 = self._s3.meta.client self._bucket = self._s3.Bucket(bucket) @property diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 2181fc542011..be7143d9b554 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -234,7 +234,7 @@ def _create_thread(tid, data): if data['server_files']: if db_data.storage == StorageChoice.LOCAL: _copy_data_from_share(data['server_files'], upload_dir) - else: + elif db_data.storage == StorageChoice.SHARE: upload_dir = settings.SHARE_ROOT av_scan_paths(upload_dir) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index f43b69f6536d..f8d2e22bf53f 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -988,13 +988,12 @@ def self(self, request): serializer = serializer_class(request.user, context={ "request": request }) return Response(serializer.data) - @method_decorator(name='list', decorator=swagger_auto_schema( operation_summary='Returns a paginated list of storages according to query parameters', manual_parameters=[ openapi.Parameter('provider_type', openapi.IN_QUERY, description="A supported provider of cloud storages", type=openapi.TYPE_STRING, enum=CloudProviderChoice.list()), - openapi.Parameter('resource', openapi.IN_QUERY, description="A name of buket or container", type=openapi.TYPE_STRING), + openapi.Parameter('resource', openapi.IN_QUERY, description="A name of bucket or container", type=openapi.TYPE_STRING), openapi.Parameter('owner', openapi.IN_QUERY, description="A resource owner", type=openapi.TYPE_STRING), openapi.Parameter('credentials_type', openapi.IN_QUERY, description="A type of a granting access", type=openapi.TYPE_STRING, enum=CredentialsTypeChoice.list()), ], @@ -1138,7 +1137,7 @@ def retrieve(self, request, *args, **kwargs): if key in storage_files: content[key].append('s') # storage if key in manifest_files: content[key].append('m') # manifest - data = json.loads(content) + data = json.dumps(content) return Response(data=data, content_type="aplication/json") except CloudStorageModel.DoesNotExist: From 9caff79ec71ca72b71b83500dbd4ec8a4eb3dc6d Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 16 Apr 2021 02:29:21 +0300 Subject: [PATCH 11/26] Update migration dependency --- cvat/apps/engine/migrations/0039_auto_20210414_2110.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/migrations/0039_auto_20210414_2110.py b/cvat/apps/engine/migrations/0039_auto_20210414_2110.py index 8f915e277dee..07f3dfe3d1cf 100644 --- a/cvat/apps/engine/migrations/0039_auto_20210414_2110.py +++ b/cvat/apps/engine/migrations/0039_auto_20210414_2110.py @@ -10,7 +10,7 @@ class Migration(migrations.Migration): dependencies = [ migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ('engine', '0038_manifest'), + ('engine', '0039_auto_training'), ] operations = [ From 64139069622921da1be2a37a718c1fd531ce255c Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 16 Apr 2021 02:48:15 +0300 Subject: [PATCH 12/26] Fix pylint issues --- cvat/apps/engine/admin.py | 4 ++-- cvat/apps/engine/cloud_provider.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cvat/apps/engine/admin.py b/cvat/apps/engine/admin.py index 7a919d624507..ba73dd2e6c79 100644 --- a/cvat/apps/engine/admin.py +++ b/cvat/apps/engine/admin.py @@ -11,7 +11,7 @@ class JobInline(admin.TabularInline): can_delete = False # Don't show extra lines to add an object - def has_add_permission(self, request, object=None): + def has_add_permission(self, request, obj): return False class SegmentInline(admin.TabularInline): @@ -21,7 +21,7 @@ class SegmentInline(admin.TabularInline): can_delete = False # Don't show extra lines to add an object - def has_add_permission(self, request, object=None): + def has_add_permission(self, request, obj): return False diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 423e0c977f18..300576095632 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -4,7 +4,7 @@ import boto3 from boto3.s3.transfer import TransferConfig -from botocore.exceptions import WaiterError, NoCredentialsError +from botocore.exceptions import WaiterError from botocore.handlers import disable_signing from azure.storage.blob import BlobServiceClient From 91c0e4236b7cd21eb2d2c6c206cca18b98243176 Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 22 Apr 2021 13:02:18 +0300 Subject: [PATCH 13/26] Some fixes & bandit & add specific attr --- cvat/apps/engine/cache.py | 10 ++- cvat/apps/engine/cloud_provider.py | 76 +++++++++---------- ...20210414_2110.py => 0040_cloud_storage.py} | 4 +- cvat/apps/engine/models.py | 16 +++- cvat/apps/engine/serializers.py | 13 +++- cvat/apps/engine/task.py | 43 ++++++----- cvat/apps/engine/utils.py | 3 + cvat/apps/engine/views.py | 12 ++- cvat/settings/base.py | 13 ++-- 9 files changed, 109 insertions(+), 81 deletions(-) rename cvat/apps/engine/migrations/{0039_auto_20210414_2110.py => 0040_cloud_storage.py} (91%) diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index 999a271dc6b4..85c995ebcee6 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -13,7 +13,7 @@ ImageDatasetManifestReader, VideoDatasetManifestReader) from cvat.apps.engine.models import DataChoice, StorageChoice from cvat.apps.engine.models import DimensionType -from cvat.apps.engine.cloud_provider import CloudStorage, Credentials +from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials from cvat.apps.engine.utils import md5_hash class CacheInteraction: def __init__(self, dimension=DimensionType.DIM_2D): @@ -50,7 +50,8 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): buff = BytesIO() upload_dir = { StorageChoice.LOCAL: db_data.get_upload_dirname(), - StorageChoice.SHARE: settings.SHARE_ROOT + StorageChoice.SHARE: settings.SHARE_ROOT, + StorageChoice.CLOUD_STORAGE: db_data.get_upload_dirname() }[db_data.storage] if hasattr(db_data, 'video'): source_path = os.path.join(upload_dir, db_data.video.path) @@ -75,9 +76,10 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): }) details = { 'resource': db_cloud_storage.resource, - 'credentials': credentials + 'credentials': credentials, + 'specific_attributes': db_cloud_storage.get_specific_attributes() } - cloud_storage_instance = CloudStorage(cloud_provider=db_cloud_storage.provider_type, **details) + cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) cloud_storage_instance.initialize_content() for item in reader: name = f"{item['name']}{item['extension']}" diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 300576095632..09c48f4b344b 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -58,7 +58,7 @@ def upload_file(self, file_obj, file_name): pass def __contains__(self, file_name): - return file_name in (item['name'] for item in self._files.values()) + return file_name in (item['name'] for item in self._files) def __len__(self): return len(self._files) @@ -67,46 +67,33 @@ def __len__(self): def content(self): return list(map(lambda x: x['name'] , self._files)) -# def get_cloud_storage_instance(cloud_provider, resource, credentials): -# instance = None -# проверить креденшелы! -# if cloud_provider == CloudProviderChoice.AWS_S3: -# instance = AWS_S3( -# bucket=resource, -# session_token=credentials.session_token, -# ) -# elif cloud_provider == CloudProviderChoice.AZURE_CONTAINER: -# instance = AzureBlobContainer( -# container_name=resource, -# sas_token=credentials.session_token, -# ) -# return instance - -# TODO: подумать возможно оставить функцию provider вместо класса ниже -class CloudStorage: - def __init__(self, cloud_provider, resource, credentials): - if cloud_provider == CloudProviderChoice.AWS_S3: - self.__instance = AWS_S3( - bucket=resource, - access_key_id=credentials.key, - secret_key=credentials.secret_key, - session_token=credentials.session_token, - ) - elif cloud_provider == CloudProviderChoice.AZURE_CONTAINER: - self.__instance = AzureBlobContainer( - container=resource, - account_name=credentials.account_name, - sas_token=credentials.session_token, - ) - else: - raise NotImplementedError() - - def __getattr__(self, name): - assert hasattr(self.__instance, name), 'Unknown behavior: {}'.format(name) - return self.__instance.__getattribute__(name) +def get_cloud_storage_instance(cloud_provider, resource, credentials, specific_attributes=None): + instance = None + if cloud_provider == CloudProviderChoice.AWS_S3: + instance = AWS_S3( + bucket=resource, + access_key_id=credentials.key, + secret_key=credentials.secret_key, + session_token=credentials.session_token, + region=specific_attributes.get('region', 'us-east-2') + ) + elif cloud_provider == CloudProviderChoice.AZURE_CONTAINER: + instance = AzureBlobContainer( + container=resource, + account_name=credentials.account_name, + sas_token=credentials.session_token + ) + else: + raise NotImplementedError() + return instance class AWS_S3(_CloudStorage): - def __init__(self, bucket, access_key_id=None, secret_key=None, session_token=None): + def __init__(self, + bucket, + region, + access_key_id=None, + secret_key=None, + session_token=None): super().__init__() if all([access_key_id, secret_key, session_token]): self._client_s3 = boto3.client( @@ -114,6 +101,7 @@ def __init__(self, bucket, access_key_id=None, secret_key=None, session_token=No aws_access_key_id=access_key_id, aws_secret_access_key=secret_key, aws_session_token=session_token, + region_name=region ) elif any([access_key_id, secret_key, session_token]): raise Exception('Insufficient data for authorization') @@ -123,6 +111,7 @@ def __init__(self, bucket, access_key_id=None, secret_key=None, session_token=No self._s3.meta.client.meta.events.register('choose-signer.s3.*', disable_signing) self._client_s3 = self._s3.meta.client self._bucket = self._s3.Bucket(bucket) + self.region = region @property def bucket(self): @@ -195,13 +184,18 @@ def download_fileobj(self, key): def create(self): try: - _ = self._bucket.create( + responce = self._bucket.create( ACL='private', CreateBucketConfiguration={ - 'LocationConstraint': 'us-east-2',#TODO + 'LocationConstraint': self.region, }, ObjectLockEnabledForBucket=False ) + slogger.glob.info( + 'Bucket {} has been created on {} region'.format( + self.name, + responce['Location'] + )) except Exception as ex: msg = str(ex) slogger.glob.info(msg) diff --git a/cvat/apps/engine/migrations/0039_auto_20210414_2110.py b/cvat/apps/engine/migrations/0040_cloud_storage.py similarity index 91% rename from cvat/apps/engine/migrations/0039_auto_20210414_2110.py rename to cvat/apps/engine/migrations/0040_cloud_storage.py index 07f3dfe3d1cf..c952b1a93740 100644 --- a/cvat/apps/engine/migrations/0039_auto_20210414_2110.py +++ b/cvat/apps/engine/migrations/0040_cloud_storage.py @@ -1,4 +1,4 @@ -# Generated by Django 3.1.7 on 2021-04-14 21:10 +# Generated by Django 3.1.7 on 2021-04-22 09:45 import cvat.apps.engine.models from django.conf import settings @@ -29,6 +29,8 @@ class Migration(migrations.Migration): ('updated_date', models.DateTimeField(auto_now=True)), ('credentials', models.CharField(max_length=500)), ('credentials_type', models.CharField(choices=[('TEMP_KEY_SECRET_KEY_TOKEN_PAIR', 'TEMP_KEY_SECRET_KEY_TOKEN_PAIR'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=30)), + ('specific_attributes', models.CharField(blank=True, max_length=50)), + ('description', models.TextField(default='')), ('owner', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='cloud_storages', to=settings.AUTH_USER_MODEL)), ], options={ diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 343d3703a58a..675b46301246 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -554,8 +554,9 @@ def __str__(self): return self.value class CredentialsTypeChoice(str, Enum): - TEMP_KEY_SECRET_KEY_TOKEN_PAIR = 'TEMP_KEY_SECRET_KEY_TOKEN_PAIR' - ACCOUNT_NAME_TOKEN_PAIR = 'ACCOUNT_NAME_TOKEN_PAIR' + # ignore bandit issues because false positives + TEMP_KEY_SECRET_KEY_TOKEN_PAIR = 'TEMP_KEY_SECRET_KEY_TOKEN_PAIR' # nosec + ACCOUNT_NAME_TOKEN_PAIR = 'ACCOUNT_NAME_TOKEN_PAIR' # nosec ANONYMOUS_ACCESS = 'ANONYMOUS_ACCESS' @classmethod @@ -578,6 +579,8 @@ class CloudStorage(models.Model): updated_date = models.DateTimeField(auto_now=True) credentials = models.CharField(max_length=500) credentials_type = models.CharField(max_length=30, choices=CredentialsTypeChoice.choices())#auth_type + specific_attributes = models.CharField(max_length=50, blank=True) + description = models.TextField(default='') class Meta: default_permissions = () @@ -593,4 +596,11 @@ def get_storage_logs_dirname(self): return os.path.join(self.get_storage_dirname(), 'logs') def get_log_path(self): - return os.path.join(self.get_storage_dirname(), "storage.log") \ No newline at end of file + return os.path.join(self.get_storage_dirname(), "storage.log") + + def get_specific_attributes(self): + attributes = self.specific_attributes.split('&') + return { + item.split('=')[0].strip(): item.split('=')[1].strip() + for item in attributes + } if len(attributes) else dict() \ No newline at end of file diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index fbf8b63e1d8d..bc4cf1d0ae12 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -11,7 +11,7 @@ from cvat.apps.dataset_manager.formats.utils import get_label_color from cvat.apps.engine import models -from cvat.apps.engine.cloud_provider import Credentials, CloudStorage +from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials from cvat.apps.engine.log import slogger class BasicUserSerializer(serializers.ModelSerializer): @@ -725,7 +725,7 @@ class Meta: fields = ( 'provider_type', 'resource', 'owner', 'credentials_type', 'created_date', 'updated_date', 'session_token', 'account_name', 'key', - 'secret_key' + 'secret_key', 'specific_attributes', 'description' ) read_only_fields = ('created_date', 'updated_date', 'owner') @@ -748,9 +748,14 @@ def create(self, validated_data): if should_be_created: details = { 'resource': validated_data.get('resource'), - 'credentials': credentials + 'credentials': credentials, + 'specific_attributes': { + item.split('=')[0].strip(): item.split('=')[1].strip() + for item in validated_data.get('specific_attributes').split('&') + } if len(validated_data.get('specific_attributes', '')) + else dict() } - storage = CloudStorage(cloud_provider=provider_type, **details) + storage = get_cloud_storage_instance(cloud_provider=provider_type, **details) try: storage.create() except Exception as ex: diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index be7143d9b554..7f957ac52499 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -28,7 +28,7 @@ from . import models from .log import slogger -from .cloud_provider import CloudStorage, Credentials +from .cloud_provider import get_cloud_storage_instance, Credentials ############################# Low Level server API @@ -236,6 +236,23 @@ def _create_thread(tid, data): _copy_data_from_share(data['server_files'], upload_dir) elif db_data.storage == StorageChoice.SHARE: upload_dir = settings.SHARE_ROOT + else: # cloud storage + if not manifest_file: raise Exception('A manifest file not found') + db_cloud_storage = db_data.cloud_storage + credentials = Credentials() + credentials.convert_from_db({ + 'type': db_cloud_storage.credentials_type, + 'value': db_cloud_storage.value, + }) + + details = { + 'resource': db_cloud_storage.resource, + 'credentials': credentials, + 'specific_attributes': db_cloud_storage.get_specific_attributes() + } + cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) + cloud_storage_instance.download_file(manifest_file[0], db_data.get_manifest_path()) + cloud_storage_instance.download_file(media['image'][0], os.path.join(upload_dir, media['image'][0])) av_scan_paths(upload_dir) @@ -317,7 +334,13 @@ def update_progress(progress): # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): - w, h = extractor.get_image_size(0) + if not (db_data.storage == StorageChoice.CLOUD_STORAGE): + w, h = extractor.get_image_size(0) + else: + manifest = ImageManifestManager(db_data.get_manifest_path()) + manifest.init_index() + img_properties = manifest[0] + w, h = img_properties['width'], img_properties['height'] area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: @@ -401,8 +424,6 @@ def _update_status(msg): db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: - if db_data.storage == StorageChoice.CLOUD_STORAGE: - raise Exception('A manifest file was not foud') if db_task.dimension == DimensionType.DIM_2D: meta_info = manifest.prepare_meta( sources=extractor.absolute_source_paths, @@ -418,20 +439,6 @@ def _update_status(msg): 'extension': ext }) manifest.create(content) - if db_data.storage == StorageChoice.CLOUD_STORAGE: - db_cloud_storage = db_data.cloud_storage - credentials = Credentials() - credentials.convert_from_db({ - 'type': db_cloud_storage.credentials_type, - 'value': db_cloud_storage.value, - }) - - details = { - 'resource': db_cloud_storage.resource, - 'credentials': credentials, - } - cloud_storage_instance = CloudStorage(cloud_provider=db_cloud_storage.provider_type, **details) - cloud_storage_instance.download_file(manifest_file[0], db_data.get_manifest_path()) manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby(extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): diff --git a/cvat/apps/engine/utils.py b/cvat/apps/engine/utils.py index f37440731281..87b7b856e301 100644 --- a/cvat/apps/engine/utils.py +++ b/cvat/apps/engine/utils.py @@ -12,6 +12,7 @@ import subprocess import os from av import VideoFrame +from PIL import Image from django.core.exceptions import ValidationError @@ -95,4 +96,6 @@ def rotate_image(image, angle): def md5_hash(frame): if isinstance(frame, VideoFrame): frame = frame.to_image() + elif isinstance(frame, str): + frame = Image.open(frame, 'r') return hashlib.md5(frame.tobytes()).hexdigest() # nosec \ No newline at end of file diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index f8d2e22bf53f..cd631f320f84 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -38,7 +38,7 @@ import cvat.apps.dataset_manager as dm import cvat.apps.dataset_manager.views # pylint: disable=unused-import from cvat.apps.authentication import auth -from cvat.apps.engine.cloud_provider import Credentials, CloudStorage +from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials from cvat.apps.dataset_manager.bindings import CvatImportError from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.engine.frame_provider import FrameProvider @@ -1067,8 +1067,13 @@ def perform_create(self, serializer): details = { 'resource': serializer.validated_data.get('resource'), 'credentials': credentials, + 'specific_attributes': { + item.split('=')[0].strip(): item.split('=')[1].strip() + for item in serializer.validated_data.get('specific_attributes').split('&') + } if len(serializer.validated_data.get('specific_attributes', '')) + else dict() } - storage = CloudStorage(cloud_provider=provider_type, **details) + storage = get_cloud_storage_instance(cloud_provider=provider_type, **details) try: storage.is_exist() except Exception as ex: @@ -1120,8 +1125,9 @@ def retrieve(self, request, *args, **kwargs): details = { 'resource': db_storage.resource, 'credentials': credentials, + 'specific_attributes': db_storage.get_specific_attributes() } - storage = CloudStorage(cloud_provider=db_storage.provider_type, **details) + storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) storage.initialize_content() storage_files = storage.content diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 9f29f2a40763..2788e7142799 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -56,8 +56,7 @@ def add_ssh_keys(): IGNORE_FILES = ('README.md', 'ssh.pid') keys_to_add = [entry.name for entry in os.scandir(ssh_dir) if entry.name not in IGNORE_FILES] keys_to_add = ' '.join(os.path.join(ssh_dir, f) for f in keys_to_add) - subprocess.run(['ssh-add {}'.format(keys_to_add)], - shell = True, + subprocess.run(['ssh-add', '{}'.format(keys_to_add)], # nosec stderr = subprocess.PIPE, # lets set the timeout if ssh-add requires a input passphrase for key # otherwise the process will be freezed @@ -68,14 +67,14 @@ def add_ssh_keys(): fcntl.flock(pid, fcntl.LOCK_EX) try: add_ssh_keys() - keys = subprocess.run(['ssh-add -l'], shell = True, + keys = subprocess.run(['ssh-add', '-l'], # nosec stdout = subprocess.PIPE).stdout.decode('utf-8').split('\n') if 'has no identities' in keys[0]: print('SSH keys were not found') volume_keys = os.listdir(keys_dir) if not ('id_rsa' in volume_keys and 'id_rsa.pub' in volume_keys): print('New pair of keys are being generated') - subprocess.run(['ssh-keygen -b 4096 -t rsa -f {}/id_rsa -q -N ""'.format(ssh_dir)], shell = True) + subprocess.run(['ssh-keygen -b 4096 -t rsa -f {}/id_rsa -q -N ""'.format(ssh_dir)]) # nosec shutil.copyfile('{}/id_rsa'.format(ssh_dir), '{}/id_rsa'.format(keys_dir)) shutil.copymode('{}/id_rsa'.format(ssh_dir), '{}/id_rsa'.format(keys_dir)) shutil.copyfile('{}/id_rsa.pub'.format(ssh_dir), '{}/id_rsa.pub'.format(keys_dir)) @@ -86,15 +85,15 @@ def add_ssh_keys(): shutil.copymode('{}/id_rsa'.format(keys_dir), '{}/id_rsa'.format(ssh_dir)) shutil.copyfile('{}/id_rsa.pub'.format(keys_dir), '{}/id_rsa.pub'.format(ssh_dir)) shutil.copymode('{}/id_rsa.pub'.format(keys_dir), '{}/id_rsa.pub'.format(ssh_dir)) - subprocess.run(['ssh-add', '{}/id_rsa'.format(ssh_dir)], shell = True) + subprocess.run(['ssh-add', '{}/id_rsa'.format(ssh_dir)]) # nosec finally: fcntl.flock(pid, fcntl.LOCK_UN) try: if os.getenv("SSH_AUTH_SOCK", None): generate_ssh_keys() -except Exception: - pass +except Exception as ex: + print(str(ex)) INSTALLED_APPS = [ 'django.contrib.admin', From 8c39675d142679b2d853c463f633a06fca486471 Mon Sep 17 00:00:00 2001 From: Maya Date: Tue, 4 May 2021 22:13:15 +0300 Subject: [PATCH 14/26] Some fixes --- cvat/apps/engine/cache.py | 2 +- cvat/apps/engine/cloud_provider.py | 51 ++++++++---------------------- cvat/apps/engine/views.py | 6 ++-- cvat/requirements/base.txt | 2 +- 4 files changed, 18 insertions(+), 43 deletions(-) diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index 85c995ebcee6..88c58124b6d8 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -51,7 +51,7 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): upload_dir = { StorageChoice.LOCAL: db_data.get_upload_dirname(), StorageChoice.SHARE: settings.SHARE_ROOT, - StorageChoice.CLOUD_STORAGE: db_data.get_upload_dirname() + StorageChoice.CLOUD_STORAGE: db_data.get_upload_dirname(), }[db_data.storage] if hasattr(db_data, 'video'): source_path = os.path.join(upload_dir, db_data.video.path) diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 09c48f4b344b..1f83c20d700c 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -31,14 +31,6 @@ def create(self): def is_exist(self): pass - # @abstractmethod - # def head(self): - # pass - - # @abstractproperty - # def supported_files(self): - # pass - @abstractmethod def initialize_content(self): pass @@ -52,6 +44,8 @@ def download_file(self, key, path): if isinstance(file_obj, BytesIO): with open(path, 'wb') as f: f.write(file_obj.getvalue()) + else: + raise NotImplementedError("Unsupported type {} was found".format(type(file_obj))) @abstractmethod def upload_file(self, file_obj, file_name): @@ -88,6 +82,13 @@ def get_cloud_storage_instance(cloud_provider, resource, credentials, specific_a return instance class AWS_S3(_CloudStorage): + waiter_config = { + 'Delay': 5, # The amount of time in seconds to wait between attempts. Default: 5 + 'MaxAttempts': 3, # The maximum number of attempts to be made. Default: 20 + } + transfer_config = { + 'max_io_queue': 10, + } def __init__(self, bucket, region, @@ -121,19 +122,12 @@ def bucket(self): def name(self): return self._bucket.name - # def is_object_exist(self, verifiable='bucket_exist', config=None): - # waiter = self._client_s3.get_waiter(verifiable) - # waiter.wait(**config) - def is_exist(self): waiter = self._client_s3.get_waiter('bucket_exists') try: waiter.wait( Bucket=self.name, - WaiterConfig={ - 'Delay': 5, # The amount of time in seconds to wait between attempts. Default: 5 - 'MaxAttempts': 3 # The maximum number of attempts to be made. Default: 20 - } + WaiterConfig=self.waiter_config ) except WaiterError: raise Exception('A resource {} unavailable'.format(self.name)) @@ -144,26 +138,16 @@ def is_object_exist(self, key_object): waiter.wait( Bucket=self._bucket, Key=key_object, - WaiterConfig={ - 'Delay': 5, - 'MaxAttempts': 3, - }, + WaiterConfig=self.waiter_config ) except WaiterError: raise Exception('A file {} unavailable'.format(key_object)) - def head(self): - pass - - # @property - # def supported_files(self): - # pass - def upload_file(self, file_obj, file_name): self._bucket.upload_fileobj( Fileobj=file_obj, Key=file_name, - Config=TransferConfig(max_io_queue=10) + Config=TransferConfig(**self.transfer_config) ) def initialize_content(self): @@ -177,7 +161,7 @@ def download_fileobj(self, key): self.bucket.download_fileobj( Key=key, Fileobj=buf, - Config=TransferConfig(max_io_queue=10) + Config=TransferConfig(**self.transfer_config) ) buf.seek(0) return buf @@ -199,7 +183,7 @@ def create(self): except Exception as ex: msg = str(ex) slogger.glob.info(msg) - raise Exception(str(ex)) + raise Exception(msg) class AzureBlobContainer(_CloudStorage): @@ -249,13 +233,6 @@ def is_object_exist(self, file_name): blob_client = self._container_client.get_blob_client(file_name) return blob_client.exists() - def head(self): - pass - - # @property - # def supported_files(self): - # pass - def upload_file(self, file_obj, file_name): self._container_client.upload_blob(name=file_name, data=file_obj) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index cd631f320f84..3b7efb74e732 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1050,10 +1050,8 @@ def get_queryset(self): queryset = super().get_queryset() if (provider_type := self.request.query_params.get('provider_type', None)): if provider_type in CloudProviderChoice.list(): - queryset = queryset.filter(provider_type=provider_type) - else: - raise ValidationError('Unsupported type of cloud provider') - return queryset + return queryset.filter(provider_type=provider_type) + raise ValidationError('Unsupported type of cloud provider') def perform_create(self, serializer): # check that instance of cloud storage exists diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index 0dbd9c8d3b4c..66886dd13009 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -45,7 +45,7 @@ tensorflow==2.4.1 # Optional requirement of Datumaro patool==1.12 diskcache==5.0.2 open3d==0.11.2 -boto3==1.16.26 +boto3==1.17.61 azure-storage-blob==12.6.0 # --no-binary=datumaro: workaround for pip to install # opencv-headless instead of regular opencv, to actually run setup script From 04388a3f680bee7c5213180359382e03118f2e5e Mon Sep 17 00:00:00 2001 From: Maya Date: Wed, 5 May 2021 11:15:52 +0300 Subject: [PATCH 15/26] Fix returned response after re-requesting storage creation --- cvat/apps/engine/views.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 3b7efb74e732..18896c5e3eb9 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1001,17 +1001,6 @@ def self(self, request): tags=['cloud storages'] ) ) -@method_decorator(name='retrieve', decorator=swagger_auto_schema( - operation_summary='Method returns details of a specific cloud storage', - tags=['cloud storages'] - ) -) -@method_decorator(name='create', decorator=swagger_auto_schema( - operation_summary='Method creates a cloud storage with a specified characteristics', - responses={'201': openapi.Response(description='A storage has beed created')}, - tags=['cloud storages'] - ) -) @method_decorator(name='destroy', decorator=swagger_auto_schema( operation_summary='Method deletes a specific cloud storage', tags=['cloud storages'] @@ -1090,10 +1079,27 @@ def perform_destroy(self, instance): super().perform_destroy(instance) shutil.rmtree(cloud_storage_dirname, ignore_errors=True) + @method_decorator(name='create', decorator=swagger_auto_schema( + operation_summary='Method creates a cloud storage with a specified characteristics', + responses={ + '201': openapi.Response(description='A storage has beed created') + }, + tags=['cloud storages'] + ) + ) + def create(self, request, *args, **kwargs): + try: + response = super().create(request, *args, **kwargs) + except IntegrityError: + response = HttpResponseBadRequest('Same storage already exists') + except Exception as ex: + response = HttpResponseBadRequest(str(ex)) + return response + @method_decorator( name='retrieve', decorator=swagger_auto_schema( - operation_summary='Method returns details of a cloud storage', + operation_summary='Method returns details of a specific cloud storage', operation_description= "Method returns list of available files, if action is content", manual_parameters=[ From a12aaa769ab9ad3f15aa1043cb707df9d14b668e Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 6 May 2021 15:24:24 +0300 Subject: [PATCH 16/26] Some fixes & size restriction fixes --- cvat/apps/engine/cloud_provider.py | 16 ++++++++-------- .../apps/engine/migrations/0040_cloud_storage.py | 6 +++--- cvat/apps/engine/models.py | 14 +++++++++++--- cvat/apps/engine/serializers.py | 8 ++++---- cvat/apps/engine/views.py | 11 +++++------ 5 files changed, 31 insertions(+), 24 deletions(-) diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 1f83c20d700c..c418b027fe9d 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -147,7 +147,7 @@ def upload_file(self, file_obj, file_name): self._bucket.upload_fileobj( Fileobj=file_obj, Key=file_name, - Config=TransferConfig(**self.transfer_config) + Config=TransferConfig(max_io_queue=self.transfer_config['max_io_queue']) ) def initialize_content(self): @@ -161,7 +161,7 @@ def download_fileobj(self, key): self.bucket.download_fileobj( Key=key, Fileobj=buf, - Config=TransferConfig(**self.transfer_config) + Config=TransferConfig(max_io_queue=self.transfer_config['max_io_queue']) ) buf.seek(0) return buf @@ -186,7 +186,7 @@ def create(self): raise Exception(msg) class AzureBlobContainer(_CloudStorage): - + MAX_CONCURRENCY = 3 def __init__(self, container, account_name, sas_token=None): super().__init__() self._account_name = account_name @@ -237,6 +237,7 @@ def upload_file(self, file_obj, file_name): self._container_client.upload_blob(name=file_name, data=file_obj) + # TODO: # def multipart_upload(self, file_obj): # pass @@ -247,14 +248,13 @@ def initialize_content(self): } for item in files] def download_fileobj(self, key): - MAX_CONCURRENCY = 3 buf = BytesIO() storage_stream_downloader = self._container_client.download_blob( blob=key, offset=None, length=None, ) - storage_stream_downloader.download_to_stream(buf, max_concurrency=MAX_CONCURRENCY) + storage_stream_downloader.download_to_stream(buf, max_concurrency=self.MAX_CONCURRENCY) buf.seek(0) return buf @@ -273,7 +273,7 @@ def __init__(self, **credentials): def convert_to_db(self): converted_credentials = { - CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_PAIR : \ + CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_SET : \ " ".join([self.key, self.secret_key, self.session_token]), CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR : " ".join([self.account_name, self.session_token]), CredentialsTypeChoice.ANONYMOUS_ACCESS: "", @@ -282,12 +282,12 @@ def convert_to_db(self): def convert_from_db(self, credentials): self.credentials_type = credentials.get('type') - if self.credentials_type == CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_PAIR: + if self.credentials_type == CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_SET: self.key, self.secret_key, self.session_token = credentials.get('value').split() elif self.credentials_type == CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR: self.account_name, self.session_token = credentials.get('value').split() else: - self.account_name, self.session_token, self.key, self.secret_key = ("", "", "", "") + self.account_name, self.session_token, self.key, self.secret_key = ('', '', '', '') self.credentials_type = None def mapping_with_new_values(self, credentials): diff --git a/cvat/apps/engine/migrations/0040_cloud_storage.py b/cvat/apps/engine/migrations/0040_cloud_storage.py index c952b1a93740..22d7da1c302f 100644 --- a/cvat/apps/engine/migrations/0040_cloud_storage.py +++ b/cvat/apps/engine/migrations/0040_cloud_storage.py @@ -1,4 +1,4 @@ -# Generated by Django 3.1.7 on 2021-04-22 09:45 +# Generated by Django 3.1.8 on 2021-05-06 12:11 import cvat.apps.engine.models from django.conf import settings @@ -24,11 +24,11 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('provider_type', models.CharField(choices=[('AWS_S3_BUCKET', 'AWS_S3'), ('AZURE_CONTAINER', 'AZURE_CONTAINER'), ('GOOGLE_DRIVE', 'GOOGLE_DRIVE')], max_length=20)), - ('resource', models.CharField(max_length=50)), + ('resource', models.CharField(max_length=63)), ('created_date', models.DateTimeField(auto_now_add=True)), ('updated_date', models.DateTimeField(auto_now=True)), ('credentials', models.CharField(max_length=500)), - ('credentials_type', models.CharField(choices=[('TEMP_KEY_SECRET_KEY_TOKEN_PAIR', 'TEMP_KEY_SECRET_KEY_TOKEN_PAIR'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=30)), + ('credentials_type', models.CharField(choices=[('TEMP_KEY_SECRET_KEY_TOKEN_SET', 'TEMP_KEY_SECRET_KEY_TOKEN_SET'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=29)), ('specific_attributes', models.CharField(blank=True, max_length=50)), ('description', models.TextField(default='')), ('owner', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='cloud_storages', to=settings.AUTH_USER_MODEL)), diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 675b46301246..2dd699210176 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -555,7 +555,7 @@ def __str__(self): class CredentialsTypeChoice(str, Enum): # ignore bandit issues because false positives - TEMP_KEY_SECRET_KEY_TOKEN_PAIR = 'TEMP_KEY_SECRET_KEY_TOKEN_PAIR' # nosec + TEMP_KEY_SECRET_KEY_TOKEN_SET = 'TEMP_KEY_SECRET_KEY_TOKEN_SET' # nosec ACCOUNT_NAME_TOKEN_PAIR = 'ACCOUNT_NAME_TOKEN_PAIR' # nosec ANONYMOUS_ACCESS = 'ANONYMOUS_ACCESS' @@ -571,14 +571,22 @@ def __str__(self): return self.value class CloudStorage(models.Model): + # restrictions: + # AWS bucket name, Azure container name - 63 + # AWS access key id - 20 + # AWS secret access key - 40 + # AWS temporary session tocken - None + # The size of the security token that AWS STS API operations return is not fixed. + # We strongly recommend that you make no assumptions about the maximum size. + # The typical token size is less than 4096 bytes, but that can vary. provider_type = models.CharField(max_length=20, choices=CloudProviderChoice.choices()) - resource = models.CharField(max_length=50) + resource = models.CharField(max_length=63) owner = models.ForeignKey(User, null=True, blank=True, on_delete=models.SET_NULL, related_name="cloud_storages") created_date = models.DateTimeField(auto_now_add=True) updated_date = models.DateTimeField(auto_now=True) credentials = models.CharField(max_length=500) - credentials_type = models.CharField(max_length=30, choices=CredentialsTypeChoice.choices())#auth_type + credentials_type = models.CharField(max_length=29, choices=CredentialsTypeChoice.choices())#auth_type specific_attributes = models.CharField(max_length=50, blank=True) description = models.TextField(default='') diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index bc4cf1d0ae12..347577120023 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -715,10 +715,10 @@ class Meta: class CloudStorageSerializer(serializers.ModelSerializer): owner = BasicUserSerializer(required=False) - session_token = serializers.CharField(max_length=400, allow_blank=True, required=False) - key = serializers.CharField(max_length=40, allow_blank=True, required=False) - secret_key = serializers.CharField(max_length=60, allow_blank=True, required=False) - account_name = serializers.CharField(max_length=50, allow_blank=True, required=False) + session_token = serializers.CharField(max_length=440, allow_blank=True, required=False) + key = serializers.CharField(max_length=20, allow_blank=True, required=False) + secret_key = serializers.CharField(max_length=40, allow_blank=True, required=False) + account_name = serializers.CharField(max_length=24, allow_blank=True, required=False) class Meta: model = models.CloudStorage diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 18896c5e3eb9..e7cd8e0b3343 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1136,12 +1136,11 @@ def retrieve(self, request, *args, **kwargs): storage_files = storage.content manifest_path = request.query_params.get('manifest_path', 'manifest.jsonl') - tmp_manifest = NamedTemporaryFile(mode='w+b', suffix='cvat', prefix='manifest') - storage.download_file(manifest_path, tmp_manifest.name) - manifest = ImageManifestManager(tmp_manifest.name) - manifest.init_index() - manifest_files = manifest.data - tmp_manifest.close() + with NamedTemporaryFile(mode='w+b', suffix='cvat', prefix='manifest') as tmp_manifest: + storage.download_file(manifest_path, tmp_manifest.name) + manifest = ImageManifestManager(tmp_manifest.name) + manifest.init_index() + manifest_files = manifest.data content = {f:[] for f in set(storage_files) & set(manifest_files)} for key, _ in content.items(): if key in storage_files: content[key].append('s') # storage From 2d1073e853cb2018ddf86b763ff881da27ff5e3b Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 7 May 2021 10:52:33 +0300 Subject: [PATCH 17/26] Add display name --- cvat/apps/engine/migrations/0040_cloud_storage.py | 3 ++- cvat/apps/engine/models.py | 1 + cvat/apps/engine/serializers.py | 3 ++- cvat/apps/engine/views.py | 2 ++ 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/cvat/apps/engine/migrations/0040_cloud_storage.py b/cvat/apps/engine/migrations/0040_cloud_storage.py index 22d7da1c302f..f280c444abe1 100644 --- a/cvat/apps/engine/migrations/0040_cloud_storage.py +++ b/cvat/apps/engine/migrations/0040_cloud_storage.py @@ -1,4 +1,4 @@ -# Generated by Django 3.1.8 on 2021-05-06 12:11 +# Generated by Django 3.1.8 on 2021-05-07 06:42 import cvat.apps.engine.models from django.conf import settings @@ -25,6 +25,7 @@ class Migration(migrations.Migration): ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('provider_type', models.CharField(choices=[('AWS_S3_BUCKET', 'AWS_S3'), ('AZURE_CONTAINER', 'AZURE_CONTAINER'), ('GOOGLE_DRIVE', 'GOOGLE_DRIVE')], max_length=20)), ('resource', models.CharField(max_length=63)), + ('display_name', models.CharField(max_length=63, unique=True)), ('created_date', models.DateTimeField(auto_now_add=True)), ('updated_date', models.DateTimeField(auto_now=True)), ('credentials', models.CharField(max_length=500)), diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 2dd699210176..ed000e096bde 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -581,6 +581,7 @@ class CloudStorage(models.Model): # The typical token size is less than 4096 bytes, but that can vary. provider_type = models.CharField(max_length=20, choices=CloudProviderChoice.choices()) resource = models.CharField(max_length=63) + display_name = models.CharField(max_length=63, unique=True) owner = models.ForeignKey(User, null=True, blank=True, on_delete=models.SET_NULL, related_name="cloud_storages") created_date = models.DateTimeField(auto_now_add=True) diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 347577120023..c0f4eccb04bc 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -723,7 +723,7 @@ class CloudStorageSerializer(serializers.ModelSerializer): class Meta: model = models.CloudStorage fields = ( - 'provider_type', 'resource', 'owner', 'credentials_type', + 'provider_type', 'resource', 'display_name', 'owner', 'credentials_type', 'created_date', 'updated_date', 'session_token', 'account_name', 'key', 'secret_key', 'specific_attributes', 'description' ) @@ -781,6 +781,7 @@ def update(self, instance, validated_data): instance.credentials = credentials.convert_to_db() instance.credentials_type = validated_data.get('credentials_type', instance.credentials_type) instance.resource = validated_data.get('resource', instance.resource) + instance.display_name = validated_data.get('display_name', instance.display_name) instance.save() return instance \ No newline at end of file diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index e7cd8e0b3343..5e4db63996b9 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1092,6 +1092,8 @@ def create(self, request, *args, **kwargs): response = super().create(request, *args, **kwargs) except IntegrityError: response = HttpResponseBadRequest('Same storage already exists') + except APIException as ex: + return Response(data=ex.get_full_details(), status=ex.status_code) except Exception as ex: response = HttpResponseBadRequest(str(ex)) return response From b5386e85c7c0b96ef95918226200fc2682714d22 Mon Sep 17 00:00:00 2001 From: Maya Date: Wed, 12 May 2021 17:06:52 +0300 Subject: [PATCH 18/26] Revert changes --- cvat/settings/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 2788e7142799..1d758c3b8ee8 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -56,7 +56,8 @@ def add_ssh_keys(): IGNORE_FILES = ('README.md', 'ssh.pid') keys_to_add = [entry.name for entry in os.scandir(ssh_dir) if entry.name not in IGNORE_FILES] keys_to_add = ' '.join(os.path.join(ssh_dir, f) for f in keys_to_add) - subprocess.run(['ssh-add', '{}'.format(keys_to_add)], # nosec + subprocess.run(['ssh-add {}'.format(keys_to_add)], # nosec + shell=True, stderr = subprocess.PIPE, # lets set the timeout if ssh-add requires a input passphrase for key # otherwise the process will be freezed @@ -74,7 +75,7 @@ def add_ssh_keys(): volume_keys = os.listdir(keys_dir) if not ('id_rsa' in volume_keys and 'id_rsa.pub' in volume_keys): print('New pair of keys are being generated') - subprocess.run(['ssh-keygen -b 4096 -t rsa -f {}/id_rsa -q -N ""'.format(ssh_dir)]) # nosec + subprocess.run(['ssh-keygen -b 4096 -t rsa -f {}/id_rsa -q -N ""'.format(ssh_dir)], shell=True) # nosec shutil.copyfile('{}/id_rsa'.format(ssh_dir), '{}/id_rsa'.format(keys_dir)) shutil.copymode('{}/id_rsa'.format(ssh_dir), '{}/id_rsa'.format(keys_dir)) shutil.copyfile('{}/id_rsa.pub'.format(ssh_dir), '{}/id_rsa.pub'.format(keys_dir)) From a81b0da86d3ae5df1f99851003cc822221869592 Mon Sep 17 00:00:00 2001 From: Maya Date: Wed, 19 May 2021 12:53:11 +0300 Subject: [PATCH 19/26] Fix comments --- cvat/apps/engine/admin.py | 4 +- cvat/apps/engine/cache.py | 22 +++-- cvat/apps/engine/cloud_provider.py | 21 ++--- cvat/apps/engine/models.py | 2 +- cvat/apps/engine/serializers.py | 2 +- cvat/apps/engine/views.py | 132 ++++++++++++++--------------- cvat/requirements/base.txt | 2 +- 7 files changed, 93 insertions(+), 92 deletions(-) diff --git a/cvat/apps/engine/admin.py b/cvat/apps/engine/admin.py index ba73dd2e6c79..0dab80a8d0de 100644 --- a/cvat/apps/engine/admin.py +++ b/cvat/apps/engine/admin.py @@ -87,8 +87,8 @@ def has_add_permission(self, request): class CloudStorageAdmin(admin.ModelAdmin): date_hierarchy = 'updated_date' readonly_fields = ('created_date', 'updated_date', 'provider_type') - list_display = ('__str__', 'owner', 'created_date', 'updated_date') - search_fields = ('provider_type', 'resource', 'owner__username', 'owner__first_name', + list_display = ('__str__', 'resource', 'owner', 'created_date', 'updated_date') + search_fields = ('provider_type', 'display_name', 'resource', 'owner__username', 'owner__first_name', 'owner__last_name', 'owner__email',) empty_value_display = 'unknown' diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index 88c58124b6d8..d57861bcb4e4 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -7,7 +7,9 @@ from diskcache import Cache from django.conf import settings +from tempfile import NamedTemporaryFile +from cvat.apps.engine.log import slogger from cvat.apps.engine.media_extractors import (Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter, ZipChunkWriter, ZipCompressedChunkWriter, ImageDatasetManifestReader, VideoDatasetManifestReader) @@ -83,21 +85,27 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): cloud_storage_instance.initialize_content() for item in reader: name = f"{item['name']}{item['extension']}" - source_path = os.path.join(upload_dir, name) if name not in cloud_storage_instance: raise Exception('{} file was not found on a {} storage'.format(name, cloud_storage_instance.name)) - cloud_storage_instance.download_file(name, source_path) - assert item.get('checksum', None), \ - 'A manifest file does not contain checksum for image {}'.format(item.get('name')) - assert md5_hash(source_path) == item.get('checksum'), \ - 'Hash sums of files {} do not match'.format(name) - images.append((source_path, source_path, None)) + with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=name, delete=False) as temp_file: + source_path = temp_file.name + buf = cloud_storage_instance.download_fileobj(name) + temp_file.write(buf.getvalue()) + if not (checksum := item.get('checksum', None)): + slogger.glob.warning('A manifest file does not contain checksum for image {}'.format(item.get('name'))) + if checksum and not md5_hash(source_path) == checksum: + slogger.glob.warning('Hash sums of files {} do not match'.format(name)) + images.append((source_path, source_path, None)) else: for item in reader: source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}") images.append((source_path, source_path, None)) writer.save_as_chunk(images, buff) buff.seek(0) + if db_data.storage == StorageChoice.CLOUD_STORAGE: + images = [image_path for image in images if os.path.exists((image_path := image[0]))] + for image_path in images: + os.remove(image_path) return buff, mime_type def save_chunk(self, db_data_id, chunk_number, quality, buff, mime_type): diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index c418b027fe9d..017d5f7db9e0 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -28,7 +28,7 @@ def create(self): pass @abstractmethod - def is_exist(self): + def exists(self): pass @abstractmethod @@ -97,7 +97,7 @@ def __init__(self, session_token=None): super().__init__() if all([access_key_id, secret_key, session_token]): - self._client_s3 = boto3.client( + self._s3 = boto3.resource( 's3', aws_access_key_id=access_key_id, aws_secret_access_key=secret_key, @@ -106,11 +106,11 @@ def __init__(self, ) elif any([access_key_id, secret_key, session_token]): raise Exception('Insufficient data for authorization') - self._s3 = boto3.resource('s3') # anonymous access if not any([access_key_id, secret_key, session_token]): + self._s3 = boto3.resource('s3', region_name=region) self._s3.meta.client.meta.events.register('choose-signer.s3.*', disable_signing) - self._client_s3 = self._s3.meta.client + self._client_s3 = self._s3.meta.client self._bucket = self._s3.Bucket(bucket) self.region = region @@ -122,7 +122,7 @@ def bucket(self): def name(self): return self._bucket.name - def is_exist(self): + def exists(self): waiter = self._client_s3.get_waiter('bucket_exists') try: waiter.wait( @@ -217,17 +217,12 @@ def create(self): public_access=PublicAccess.OFF ) except ResourceExistsError: - msg = f"{self._container_client.container_name} alredy exists" + msg = f"{self._container_client.container_name} already exists" slogger.glob.info(msg) raise Exception(msg) - def is_exist(self): - try: - self._container_client.create_container() - self._container_client.delete_container() - return False - except ResourceExistsError: - return True + def exists(self): + return self._container_client.exists(timeout=5) def is_object_exist(self, file_name): blob_client = self._container_client.get_blob_client(file_name) diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index ed000e096bde..2ea497b190ee 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -596,7 +596,7 @@ class Meta: unique_together = (('provider_type', 'resource', 'credentials'),) def __str__(self): - return "{} {} {}".format(self.provider_type, self.resource, self.id) + return "{} {} {}".format(self.provider_type, self.display_name, self.id) def get_storage_dirname(self): return os.path.join(settings.CLOUD_STORAGE_ROOT, str(self.id)) diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index c0f4eccb04bc..32661211b646 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -732,7 +732,7 @@ class Meta: def validate(self, attrs): if attrs.get('provider_type') == models.CloudProviderChoice.AZURE_CONTAINER: if not attrs.get('account_name', ''): - raise exceptions.PermissionDenied('Account name for Azure container was not specified') + raise serializers.ValidationError('Account name for Azure container was not specified') return attrs def create(self, validated_data): diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 5e4db63996b9..b90df88cf4dc 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -988,11 +988,22 @@ def self(self, request): serializer = serializer_class(request.user, context={ "request": request }) return Response(serializer.data) +@method_decorator( + name='retrieve', + decorator=swagger_auto_schema( + operation_summary='Method returns details of a specific cloud storage', + responses={ + '200': openapi.Response(description='A details of a storage'), + }, + tags=['cloud storages'] + ) +) @method_decorator(name='list', decorator=swagger_auto_schema( operation_summary='Returns a paginated list of storages according to query parameters', manual_parameters=[ openapi.Parameter('provider_type', openapi.IN_QUERY, description="A supported provider of cloud storages", type=openapi.TYPE_STRING, enum=CloudProviderChoice.list()), + openapi.Parameter('display_name', openapi.IN_QUERY, description="A display name of storage", type=openapi.TYPE_STRING), openapi.Parameter('resource', openapi.IN_QUERY, description="A name of bucket or container", type=openapi.TYPE_STRING), openapi.Parameter('owner', openapi.IN_QUERY, description="A resource owner", type=openapi.TYPE_STRING), openapi.Parameter('credentials_type', openapi.IN_QUERY, description="A type of a granting access", type=openapi.TYPE_STRING, enum=CredentialsTypeChoice.list()), @@ -1014,8 +1025,8 @@ def self(self, request): class CloudStorageViewSet(auth.CloudStorageGetQuerySetMixin, viewsets.ModelViewSet): http_method_names = ['get', 'post', 'patch', 'delete'] queryset = CloudStorageModel.objects.all().prefetch_related('data').order_by('-id') - search_fields = ("provider_type", "resource", "owner__username") - filterset_fields = ['provider_type', 'resource', 'credentials_type'] + search_fields = ('provider_type', 'display_name', 'resource', 'owner__username') + filterset_fields = ['provider_type', 'display_name', 'resource', 'credentials_type'] def get_permissions(self): http_method = self.request.method @@ -1041,6 +1052,7 @@ def get_queryset(self): if provider_type in CloudProviderChoice.list(): return queryset.filter(provider_type=provider_type) raise ValidationError('Unsupported type of cloud provider') + return queryset def perform_create(self, serializer): # check that instance of cloud storage exists @@ -1062,7 +1074,7 @@ def perform_create(self, serializer): } storage = get_cloud_storage_instance(cloud_provider=provider_type, **details) try: - storage.is_exist() + storage.exists() except Exception as ex: message = str(ex) slogger.glob.error(message) @@ -1098,71 +1110,57 @@ def create(self, request, *args, **kwargs): response = HttpResponseBadRequest(str(ex)) return response - @method_decorator( - name='retrieve', - decorator=swagger_auto_schema( - operation_summary='Method returns details of a specific cloud storage', - operation_description= - "Method returns list of available files, if action is content", - manual_parameters=[ - openapi.Parameter('action', openapi.IN_QUERY, description="", - type=openapi.TYPE_STRING, enum=['content']), - openapi.Parameter('manifest_path', openapi.IN_QUERY, - description="Path to the manifest file in a cloud storage", - type=openapi.TYPE_STRING - )], - responses={ - '200': openapi.Response(description='A list of a storage content'), - }, - tags=['cloud storages'] - ) + @swagger_auto_schema( + method='get', + operation_summary='Method returns a mapped names of an available files from a storage and a manifest content', + manual_parameters=[ + openapi.Parameter('manifest_path', openapi.IN_QUERY, + description="Path to the manifest file in a cloud storage", + type=openapi.TYPE_STRING) + ], + responses={ + '200': openapi.Response(description='Mapped names of an available files from a storage and a manifest content'), + }, + tags=['cloud storages'] ) - def retrieve(self, request, *args, **kwargs): - action = request.query_params.get('action', '') - if action == 'content': - try: - pk = kwargs.get('pk') - db_storage = CloudStorageModel.objects.get(pk=pk) - credentials = Credentials() - credentials.convert_from_db({ - 'type': db_storage.credentials_type, - 'value': db_storage.credentials, - }) - details = { - 'resource': db_storage.resource, - 'credentials': credentials, - 'specific_attributes': db_storage.get_specific_attributes() - } - storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) - storage.initialize_content() - storage_files = storage.content - - manifest_path = request.query_params.get('manifest_path', 'manifest.jsonl') - with NamedTemporaryFile(mode='w+b', suffix='cvat', prefix='manifest') as tmp_manifest: - storage.download_file(manifest_path, tmp_manifest.name) - manifest = ImageManifestManager(tmp_manifest.name) - manifest.init_index() - manifest_files = manifest.data - content = {f:[] for f in set(storage_files) & set(manifest_files)} - for key, _ in content.items(): - if key in storage_files: content[key].append('s') # storage - if key in manifest_files: content[key].append('m') # manifest - - data = json.dumps(content) - return Response(data=data, content_type="aplication/json") - - except CloudStorageModel.DoesNotExist: - message = f"Storage {pk} does not exist" - slogger.glob.error(message) - return HttpResponseNotFound(message) - except Exception as ex: - return HttpResponseBadRequest(str(ex)) - elif not action: - instance = self.get_object() - serializer = self.get_serializer(instance) - return Response(serializer.data) - else: - return HttpResponseBadRequest() + @action(detail=True, methods=['GET'], url_path='content') + def content(self, request, pk): + try: + db_storage = CloudStorageModel.objects.get(pk=pk) + credentials = Credentials() + credentials.convert_from_db({ + 'type': db_storage.credentials_type, + 'value': db_storage.credentials, + }) + details = { + 'resource': db_storage.resource, + 'credentials': credentials, + 'specific_attributes': db_storage.get_specific_attributes() + } + storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) + storage.initialize_content() + storage_files = storage.content + + manifest_path = request.query_params.get('manifest_path', 'manifest.jsonl') + with NamedTemporaryFile(mode='w+b', suffix='manifest', prefix='cvat') as tmp_manifest: + storage.download_file(manifest_path, tmp_manifest.name) + manifest = ImageManifestManager(tmp_manifest.name) + manifest.init_index() + manifest_files = manifest.data + content = {f:[] for f in set(storage_files) | set(manifest_files)} + for key, _ in content.items(): + if key in storage_files: content[key].append('s') # storage + if key in manifest_files: content[key].append('m') # manifest + + data = json.dumps(content) + return Response(data=data, content_type="aplication/json") + + except CloudStorageModel.DoesNotExist: + message = f"Storage {pk} does not exist" + slogger.glob.error(message) + return HttpResponseNotFound(message) + except Exception as ex: + return HttpResponseBadRequest(str(ex)) def rq_handler(job, exc_type, exc_value, tb): job.exc_info = "".join( diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index a42cd70d7521..2d8e89bc6f91 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -46,7 +46,7 @@ patool==1.12 diskcache==5.0.2 open3d==0.11.2 boto3==1.17.61 -azure-storage-blob==12.6.0 +azure-storage-blob==12.8.1 # --no-binary=datumaro: workaround for pip to install # opencv-headless instead of regular opencv, to actually run setup script # --no-binary=pycocotools: workaround for binary incompatibility on numpy 1.20 From 1ac7b5152843a3ba7a17474abdbb4dabdb25113a Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 20 May 2021 11:40:31 +0300 Subject: [PATCH 20/26] Add validator for specific attributes --- cvat/apps/engine/serializers.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 3dbc9de5db7d..e9b05cc9e6e3 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -730,6 +730,15 @@ class Meta: ) read_only_fields = ('created_date', 'updated_date', 'owner') + # pylint: disable=no-self-use + def validate_specific_attributes(self, value): + if value: + attributes = value.split('&') + for attribute in attributes: + if not len(attribute.split('=')) == 2: + raise serializers.ValidationError('Invalid specific attributes') + return value + def validate(self, attrs): if attrs.get('provider_type') == models.CloudProviderChoice.AZURE_CONTAINER: if not attrs.get('account_name', ''): From a93dbab4d757f2f8ce21364766a9e08b0252780f Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 20 May 2021 12:13:07 +0300 Subject: [PATCH 21/26] Allow blank for description field --- cvat/apps/engine/migrations/0040_cloud_storage.py | 2 +- cvat/apps/engine/models.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cvat/apps/engine/migrations/0040_cloud_storage.py b/cvat/apps/engine/migrations/0040_cloud_storage.py index f280c444abe1..f6ef961a7e8a 100644 --- a/cvat/apps/engine/migrations/0040_cloud_storage.py +++ b/cvat/apps/engine/migrations/0040_cloud_storage.py @@ -31,7 +31,7 @@ class Migration(migrations.Migration): ('credentials', models.CharField(max_length=500)), ('credentials_type', models.CharField(choices=[('TEMP_KEY_SECRET_KEY_TOKEN_SET', 'TEMP_KEY_SECRET_KEY_TOKEN_SET'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=29)), ('specific_attributes', models.CharField(blank=True, max_length=50)), - ('description', models.TextField(default='')), + ('description', models.TextField(blank=True)), ('owner', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='cloud_storages', to=settings.AUTH_USER_MODEL)), ], options={ diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 2ea497b190ee..cec4ebe73efe 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -589,7 +589,7 @@ class CloudStorage(models.Model): credentials = models.CharField(max_length=500) credentials_type = models.CharField(max_length=29, choices=CredentialsTypeChoice.choices())#auth_type specific_attributes = models.CharField(max_length=50, blank=True) - description = models.TextField(default='') + description = models.TextField(blank=True) class Meta: default_permissions = () From c910c3c80c858a80f8f8241bb99360624510394a Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 20 May 2021 13:36:17 +0300 Subject: [PATCH 22/26] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65c9ec5f2c2e..a9b367544a8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Support of context images for 2D image tasks () +- Support of cloud storage without copying data into CVAT: server part () ### Changed From 2f733198de60c2f2d11f9152e676371326d83442 Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 21 May 2021 10:47:02 +0300 Subject: [PATCH 23/26] Change error display --- cvat/apps/engine/views.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 23a58ce7c815..c2ea10f2b137 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1100,8 +1100,15 @@ def create(self, request, *args, **kwargs): response = super().create(request, *args, **kwargs) except IntegrityError: response = HttpResponseBadRequest('Same storage already exists') + except ValidationError as exceptions: + msg_body = "" + for ex in exceptions.args: + for field, ex_msg in ex.items(): + msg_body += ": ".join([field, str(ex_msg[0])]) + msg_body += '\n' + return HttpResponseBadRequest(msg_body) except APIException as ex: - return Response(data=ex.get_full_details(), status=ex.status_code) + return Response(data=ex.get_full_details(), status=ex.status_code) except Exception as ex: response = HttpResponseBadRequest(str(ex)) return response From 4ac72cc13555aab1a356433384a1f8488ebc5841 Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 21 May 2021 11:17:31 +0300 Subject: [PATCH 24/26] Redirect error in case when storage doesn't exist --- cvat/apps/engine/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index c2ea10f2b137..81aae1311b29 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1074,7 +1074,7 @@ def perform_create(self, serializer): except Exception as ex: message = str(ex) slogger.glob.error(message) - raise serializers.ValidationError(message) + raise owner = self.request.data.get('owner') if owner: From faaefb9c5ccb7560e13a95f0fc12e19be7ce8b9e Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 31 May 2021 09:56:40 +0300 Subject: [PATCH 25/26] fix --- .../engine/migrations/0040_cloud_storage.py | 2 +- cvat/apps/engine/models.py | 2 +- cvat/apps/engine/views.py | 22 ++++++++++++++----- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/cvat/apps/engine/migrations/0040_cloud_storage.py b/cvat/apps/engine/migrations/0040_cloud_storage.py index f6ef961a7e8a..c73609fd9fef 100644 --- a/cvat/apps/engine/migrations/0040_cloud_storage.py +++ b/cvat/apps/engine/migrations/0040_cloud_storage.py @@ -25,7 +25,7 @@ class Migration(migrations.Migration): ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('provider_type', models.CharField(choices=[('AWS_S3_BUCKET', 'AWS_S3'), ('AZURE_CONTAINER', 'AZURE_CONTAINER'), ('GOOGLE_DRIVE', 'GOOGLE_DRIVE')], max_length=20)), ('resource', models.CharField(max_length=63)), - ('display_name', models.CharField(max_length=63, unique=True)), + ('display_name', models.CharField(max_length=63)), ('created_date', models.DateTimeField(auto_now_add=True)), ('updated_date', models.DateTimeField(auto_now=True)), ('credentials', models.CharField(max_length=500)), diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index cec4ebe73efe..f88f748aa689 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -581,7 +581,7 @@ class CloudStorage(models.Model): # The typical token size is less than 4096 bytes, but that can vary. provider_type = models.CharField(max_length=20, choices=CloudProviderChoice.choices()) resource = models.CharField(max_length=63) - display_name = models.CharField(max_length=63, unique=True) + display_name = models.CharField(max_length=63) owner = models.ForeignKey(User, null=True, blank=True, on_delete=models.SET_NULL, related_name="cloud_storages") created_date = models.DateTimeField(auto_now_add=True) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 81aae1311b29..d1c8d866b1b1 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -25,7 +25,7 @@ from django_filters import rest_framework as filters from django_filters.rest_framework import DjangoFilterBackend from drf_yasg import openapi -from drf_yasg.inspectors import CoreAPICompatInspector, NotHandled +from drf_yasg.inspectors import CoreAPICompatInspector, NotHandled, FieldInspector from drf_yasg.utils import swagger_auto_schema from rest_framework import mixins, serializers, status, viewsets from rest_framework.decorators import action @@ -984,6 +984,15 @@ def self(self, request): serializer = serializer_class(request.user, context={ "request": request }) return Response(serializer.data) +class RedefineDescriptionField(FieldInspector): + # pylint: disable=no-self-use + def process_result(self, result, method_name, obj, **kwargs): + if isinstance(result, openapi.Schema): + if hasattr(result, 'title') and result.title == 'Specific attributes': + result.description = 'structure like key1=value1&key2=value2\n' \ + 'supported: range=aws_range' + return result + @method_decorator( name='retrieve', decorator=swagger_auto_schema( @@ -1004,8 +1013,9 @@ def self(self, request): openapi.Parameter('owner', openapi.IN_QUERY, description="A resource owner", type=openapi.TYPE_STRING), openapi.Parameter('credentials_type', openapi.IN_QUERY, description="A type of a granting access", type=openapi.TYPE_STRING, enum=CredentialsTypeChoice.list()), ], - responses={'200': CloudStorageSerializer(many=True)}, - tags=['cloud storages'] + responses={'200': BaseCloudStorageSerializer(many=True)}, + tags=['cloud storages'], + field_inspectors=[RedefineDescriptionField] ) ) @method_decorator(name='destroy', decorator=swagger_auto_schema( @@ -1015,7 +1025,8 @@ def self(self, request): ) @method_decorator(name='partial_update', decorator=swagger_auto_schema( operation_summary='Methods does a partial update of chosen fields in a cloud storage instance', - tags=['cloud storages'] + tags=['cloud storages'], + field_inspectors=[RedefineDescriptionField] ) ) class CloudStorageViewSet(auth.CloudStorageGetQuerySetMixin, viewsets.ModelViewSet): @@ -1092,7 +1103,8 @@ def perform_destroy(self, instance): responses={ '201': openapi.Response(description='A storage has beed created') }, - tags=['cloud storages'] + tags=['cloud storages'], + field_inspectors=[RedefineDescriptionField], ) ) def create(self, request, *args, **kwargs): From 511eb51b1ac3fb392dd07e757af85c858496eded Mon Sep 17 00:00:00 2001 From: Nikita Manovich Date: Tue, 15 Jun 2021 17:37:06 +0300 Subject: [PATCH 26/26] Fixed pylint warnings about unused imports --- cvat/apps/engine/task.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index b03d95e227b1..a864bf142449 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -19,8 +19,6 @@ from django.conf import settings from django.db import transaction -from cvat.apps.engine.models import ( - DataChoice, StorageMethodChoice, StorageChoice, RelatedFile) from cvat.apps.engine import models from cvat.apps.engine.log import slogger from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter, @@ -224,7 +222,7 @@ def _create_thread(tid, data, isImport=False): upload_dir = db_data.get_upload_dirname() if data['remote_files']: - if db_data.storage != StorageChoice.CLOUD_STORAGE: + if db_data.storage != models.StorageChoice.CLOUD_STORAGE: data['remote_files'] = _download_data(data['remote_files'], upload_dir) manifest_file = [] @@ -237,7 +235,7 @@ def _create_thread(tid, data, isImport=False): if data['server_files']: if db_data.storage == models.StorageChoice.LOCAL: _copy_data_from_share(data['server_files'], upload_dir) - elif db_data.storage == StorageChoice.SHARE: + elif db_data.storage == models.StorageChoice.SHARE: upload_dir = settings.SHARE_ROOT else: # cloud storage if not manifest_file: raise Exception('A manifest file not found') @@ -353,7 +351,7 @@ def update_progress(progress): # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): - if not (db_data.storage == StorageChoice.CLOUD_STORAGE): + if not (db_data.storage == models.StorageChoice.CLOUD_STORAGE): w, h = extractor.get_image_size(0) else: manifest = ImageManifestManager(db_data.get_manifest_path())