Skip to content

Commit

Permalink
Support of Google Cloud Storage for cloud storage (#3561)
Browse files Browse the repository at this point in the history
* Support Google Cloud Storage for cloud provider

Implement GoogleCloudStorage and add KEY_FILE_PATH to
CredentialsTypeChoice, and key_file_path in Credentials

* Handle cloud storage key contains path seperator

* Update changelog

* Add the migration file for engine_cloud_provider table
  • Loading branch information
jasonkit authored Aug 27, 2021
1 parent 5446816 commit 6a29b34
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Explicit "Done" button when drawing any polyshapes (<https://github.com/openvinotoolkit/cvat/pull/3417>)
- Histogram equalization with OpenCV javascript (<https://github.com/openvinotoolkit/cvat/pull/3447>)
- Client-side polyshapes approximation when using semi-automatic interactors & scissors (<https://github.com/openvinotoolkit/cvat/pull/3450>)
- Support of Google Cloud Storage for cloud storage (<https://github.com/openvinotoolkit/cvat/pull/3561>)

### Changed

Expand Down
2 changes: 1 addition & 1 deletion cvat/apps/engine/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number):
name = f"{item['name']}{item['extension']}"
if name not in cloud_storage_instance:
raise Exception('{} file was not found on a {} storage'.format(name, cloud_storage_instance.name))
with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=name, delete=False) as temp_file:
with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=name.replace(os.path.sep, '#'), delete=False) as temp_file:
source_path = temp_file.name
buf = cloud_storage_instance.download_fileobj(name)
temp_file.write(buf.getvalue())
Expand Down
96 changes: 94 additions & 2 deletions cvat/apps/engine/cloud_provider.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#from dataclasses import dataclass
from abc import ABC, abstractmethod, abstractproperty
from io import BytesIO
import os
import os.path

import boto3
from boto3.s3.transfer import TransferConfig
Expand All @@ -11,6 +13,8 @@
from azure.core.exceptions import ResourceExistsError
from azure.storage.blob import PublicAccess

from google.cloud import storage

from cvat.apps.engine.log import slogger
from cvat.apps.engine.models import CredentialsTypeChoice, CloudProviderChoice

Expand Down Expand Up @@ -42,6 +46,7 @@ def download_fileobj(self, key):
def download_file(self, key, path):
file_obj = self.download_fileobj(key)
if isinstance(file_obj, BytesIO):
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wb') as f:
f.write(file_obj.getvalue())
else:
Expand Down Expand Up @@ -77,6 +82,14 @@ def get_cloud_storage_instance(cloud_provider, resource, credentials, specific_a
account_name=credentials.account_name,
sas_token=credentials.session_token
)
elif cloud_provider == CloudProviderChoice.GOOGLE_CLOUD_STORAGE:
instance = GoogleCloudStorage(
bucket_name=resource,
service_account_json=credentials.key_file_path,
prefix=specific_attributes.get('prefix'),
location=specific_attributes.get('location'),
project=specific_attributes.get('project')
)
else:
raise NotImplementedError()
return instance
Expand Down Expand Up @@ -256,21 +269,97 @@ def download_fileobj(self, key):
class GOOGLE_DRIVE(_CloudStorage):
pass

class GoogleCloudStorage(_CloudStorage):

def __init__(self, bucket_name, prefix=None, service_account_json=None, project=None, location=None):
super().__init__()
if service_account_json:
self._storage_client = storage.Client.from_service_account_json(service_account_json)
else:
self._storage_client = storage.Client()

bucket = self._storage_client.lookup_bucket(bucket_name)
if bucket is None:
bucket = self._storage_client.bucket(bucket_name, user_project=project)

self._bucket = bucket
self._bucket_location = location
self._prefix = prefix

@property
def bucket(self):
return self._bucket

@property
def name(self):
return self._bucket.name

def exists(self):
return self._storage_client.lookup_bucket(self.name) is not None

def initialize_content(self):
self._files = [
{
'name': blob.name
}
for blob in self._storage_client.list_blobs(
self.bucket, prefix=self._prefix
)
]

def download_fileobj(self, key):
buf = BytesIO()
blob = self.bucket.blob(key)
self._storage_client.download_blob_to_file(blob, buf)
buf.seek(0)
return buf

def is_object_exist(self, key):
return self.bucket.blob(key).exists()

def upload_file(self, file_obj, file_name):
self.bucket.blob(file_name).upload_from_file(file_obj)

def create(self):
try:
self._bucket = self._storage_client.create_bucket(
self.bucket,
location=self._bucket_location
)
slogger.glob.info(
'Bucket {} has been created at {} region for {}'.format(
self.name,
self.bucket.location,
self.bucket.user_project,
))
except Exception as ex:
msg = str(ex)
slogger.glob.info(msg)
raise Exception(msg)

def get_file_last_modified(self, key):
blob = self.bucket.blob(key)
blob.reload()
return blob.updated


class Credentials:
__slots__ = ('key', 'secret_key', 'session_token', 'account_name', 'credentials_type')
__slots__ = ('key', 'secret_key', 'session_token', 'account_name', 'key_file_path', 'credentials_type')

def __init__(self, **credentials):
self.key = credentials.get('key', '')
self.secret_key = credentials.get('secret_key', '')
self.session_token = credentials.get('session_token', '')
self.account_name = credentials.get('account_name', '')
self.key_file_path = credentials.get('key_file_path', '')
self.credentials_type = credentials.get('credentials_type', None)

def convert_to_db(self):
converted_credentials = {
CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_SET : \
" ".join([self.key, self.secret_key, self.session_token]),
CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR : " ".join([self.account_name, self.session_token]),
CredentialsTypeChoice.KEY_FILE_PATH: self.key_file_path,
CredentialsTypeChoice.ANONYMOUS_ACCESS: "",
}
return converted_credentials[self.credentials_type]
Expand All @@ -281,6 +370,8 @@ def convert_from_db(self, credentials):
self.key, self.secret_key, self.session_token = credentials.get('value').split()
elif self.credentials_type == CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR:
self.account_name, self.session_token = credentials.get('value').split()
elif self.credentials_type == CredentialsTypeChoice.KEY_FILE_PATH:
self.key_file_path = credentials.get('value')
else:
self.account_name, self.session_token, self.key, self.secret_key = ('', '', '', '')
self.credentials_type = None
Expand All @@ -291,6 +382,7 @@ def mapping_with_new_values(self, credentials):
self.secret_key = credentials.get('secret_key', self.secret_key)
self.session_token = credentials.get('session_token', self.session_token)
self.account_name = credentials.get('account_name', self.account_name)
self.key_file_path = credentials.get('key_file_path', self.key_file_path)

def values(self):
return [self.key, self.secret_key, self.session_token, self.account_name]
return [self.key, self.secret_key, self.session_token, self.account_name, self.key_file_path]
23 changes: 23 additions & 0 deletions cvat/apps/engine/migrations/0041_auto_20210827_0258.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 3.1.13 on 2021-08-27 02:58

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('engine', '0040_cloud_storage'),
]

operations = [
migrations.AlterField(
model_name='cloudstorage',
name='credentials_type',
field=models.CharField(choices=[('TEMP_KEY_SECRET_KEY_TOKEN_SET', 'TEMP_KEY_SECRET_KEY_TOKEN_SET'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('KEY_FILE_PATH', 'KEY_FILE_PATH'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=29),
),
migrations.AlterField(
model_name='cloudstorage',
name='provider_type',
field=models.CharField(choices=[('AWS_S3_BUCKET', 'AWS_S3'), ('AZURE_CONTAINER', 'AZURE_CONTAINER'), ('GOOGLE_DRIVE', 'GOOGLE_DRIVE'), ('GOOGLE_CLOUD_STORAGE', 'GOOGLE_CLOUD_STORAGE')], max_length=20),
),
]
2 changes: 2 additions & 0 deletions cvat/apps/engine/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ class CloudProviderChoice(str, Enum):
AWS_S3 = 'AWS_S3_BUCKET'
AZURE_CONTAINER = 'AZURE_CONTAINER'
GOOGLE_DRIVE = 'GOOGLE_DRIVE'
GOOGLE_CLOUD_STORAGE = 'GOOGLE_CLOUD_STORAGE'

@classmethod
def choices(cls):
Expand All @@ -558,6 +559,7 @@ class CredentialsTypeChoice(str, Enum):
# ignore bandit issues because false positives
TEMP_KEY_SECRET_KEY_TOKEN_SET = 'TEMP_KEY_SECRET_KEY_TOKEN_SET' # nosec
ACCOUNT_NAME_TOKEN_PAIR = 'ACCOUNT_NAME_TOKEN_PAIR' # nosec
KEY_FILE_PATH = 'KEY_FILE_PATH'
ANONYMOUS_ACCESS = 'ANONYMOUS_ACCESS'

@classmethod
Expand Down
9 changes: 7 additions & 2 deletions cvat/apps/engine/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -792,14 +792,15 @@ class CloudStorageSerializer(serializers.ModelSerializer):
session_token = serializers.CharField(max_length=440, allow_blank=True, required=False)
key = serializers.CharField(max_length=20, allow_blank=True, required=False)
secret_key = serializers.CharField(max_length=40, allow_blank=True, required=False)
key_file_path = serializers.CharField(max_length=64, allow_blank=True, required=False)
account_name = serializers.CharField(max_length=24, allow_blank=True, required=False)

class Meta:
model = models.CloudStorage
fields = (
'provider_type', 'resource', 'display_name', 'owner', 'credentials_type',
'created_date', 'updated_date', 'session_token', 'account_name', 'key',
'secret_key', 'specific_attributes', 'description'
'secret_key', 'key_file_path', 'specific_attributes', 'description'
)
read_only_fields = ('created_date', 'updated_date', 'owner')

Expand All @@ -816,6 +817,9 @@ def validate(self, attrs):
if attrs.get('provider_type') == models.CloudProviderChoice.AZURE_CONTAINER:
if not attrs.get('account_name', ''):
raise serializers.ValidationError('Account name for Azure container was not specified')
if attrs.get('provider_type') == models.CloudProviderChoice.GOOGLE_CLOUD_STORAGE:
if not attrs.get('key_file_path', ''):
raise serializers.ValidationError('Key file path for Google cloud storage was not specified')
return attrs

def create(self, validated_data):
Expand All @@ -826,6 +830,7 @@ def create(self, validated_data):
key=validated_data.pop('key', ''),
secret_key=validated_data.pop('secret_key', ''),
session_token=validated_data.pop('session_token', ''),
key_file_path=validated_data.pop('key_file_path', ''),
credentials_type = validated_data.get('credentials_type')
)
if should_be_created:
Expand Down Expand Up @@ -859,7 +864,7 @@ def update(self, instance, validated_data):
'type': instance.credentials_type,
'value': instance.credentials,
})
tmp = {k:v for k,v in validated_data.items() if k in {'key', 'secret_key', 'account_name', 'session_token', 'credentials_type'}}
tmp = {k:v for k,v in validated_data.items() if k in {'key', 'secret_key', 'account_name', 'session_token', 'key_file_path', 'credentials_type'}}
credentials.mapping_with_new_values(tmp)
instance.credentials = credentials.convert_to_db()
instance.credentials_type = validated_data.get('credentials_type', instance.credentials_type)
Expand Down
3 changes: 2 additions & 1 deletion cvat/apps/engine/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1262,7 +1262,8 @@ def perform_create(self, serializer):
session_token=serializer.validated_data.get('session_token', ''),
account_name=serializer.validated_data.get('account_name', ''),
key=serializer.validated_data.get('key', ''),
secret_key=serializer.validated_data.get('secret_key', '')
secret_key=serializer.validated_data.get('secret_key', ''),
key_file_path=serializer.validated_data.get('key_file_path', '')
)
details = {
'resource': serializer.validated_data.get('resource'),
Expand Down
1 change: 1 addition & 0 deletions cvat/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ diskcache==5.0.2
open3d==0.11.2
boto3==1.17.61
azure-storage-blob==12.8.1
google-cloud-storage==1.42.0
# --no-binary=datumaro: workaround for pip to install
# opencv-headless instead of regular opencv, to actually run setup script
# --no-binary=pycocotools: workaround for binary incompatibility on numpy 1.20
Expand Down

0 comments on commit 6a29b34

Please sign in to comment.