Skip to content

Commit

Permalink
Support any name for a manifest (#4041)
Browse files Browse the repository at this point in the history
* Initial version

* Fix support 2 versions && fix case for cloud storages

* Fix eslint errors

* tmp

* Fix manifest validation when data hasn't been copied yet

* fix

* Update changelog
  • Loading branch information
Marishka17 authored Dec 22, 2021
1 parent f74b6f0 commit 5281e79
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 73 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add "tag" return type for automatic annotation in Nuclio (<https://github.com/openvinotoolkit/cvat/pull/3896>)
- Dataset importing to a project (<https://github.com/openvinotoolkit/cvat/pull/3790>)
- User is able to customize information that text labels show (<https://github.com/openvinotoolkit/cvat/pull/4029>)
- Support for uploading manifest with any name (<https://github.com/openvinotoolkit/cvat/pull/4041>)

### Changed
- TDB
Expand Down
23 changes: 11 additions & 12 deletions cvat-ui/src/components/file-manager/file-manager.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,13 @@ export class FileManager extends React.PureComponent<Props, State> {
};
}

private loadData = (key: string): Promise<void> =>
new Promise<void>((resolve, reject): void => {
const { onLoadData } = this.props;
private loadData = (key: string): Promise<void> => new Promise<void>((resolve, reject): void => {
const { onLoadData } = this.props;

const success = (): void => resolve();
const failure = (): void => reject();
onLoadData(key, success, failure);
});
const success = (): void => resolve();
const failure = (): void => reject();
onLoadData(key, success, failure);
});

public reset(): void {
const { active } = this.state;
Expand Down Expand Up @@ -161,8 +160,8 @@ export class FileManager extends React.PureComponent<Props, State> {
private renderShareSelector(): JSX.Element {
function renderTreeNodes(data: TreeNodeNormal[]): JSX.Element[] {
// sort alphabetically
data.sort((a: TreeNodeNormal, b: TreeNodeNormal): number =>
a.key.toLocaleString().localeCompare(b.key.toLocaleString()));
data.sort((a: TreeNodeNormal, b: TreeNodeNormal): number => (
a.key.toLocaleString().localeCompare(b.key.toLocaleString())));
return data.map((item: TreeNodeNormal) => {
if (item.children) {
return (
Expand Down Expand Up @@ -205,8 +204,8 @@ export class FileManager extends React.PureComponent<Props, State> {
halfChecked: ReactText[];
},
): void => {
const keys = (checkedKeys as ReactText[]).map((text: ReactText): string =>
text.toLocaleString());
const keys = (checkedKeys as ReactText[]).map((text: ReactText): string => (
text.toLocaleString()));
this.setState({
files: {
...files,
Expand Down Expand Up @@ -267,7 +266,7 @@ export class FileManager extends React.PureComponent<Props, State> {
<CloudStorageTab
formRef={this.cloudStorageTabFormRef}
cloudStorage={cloudStorage}
selectedFiles={files.cloudStorage.filter((item) => !item.endsWith('manifest.jsonl'))}
selectedFiles={files.cloudStorage.filter((item) => !item.endsWith('.jsonl'))}
onSelectCloudStorage={(_cloudStorage: CloudStorage | null) => {
this.setState({ cloudStorage: _cloudStorage });
}}
Expand Down
107 changes: 63 additions & 44 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter,
ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort)
from cvat.apps.engine.utils import av_scan_paths
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager, is_manifest
from utils.dataset_manifest.core import VideoManifestValidator
from utils.dataset_manifest.utils import detect_related_images
from .cloud_provider import get_cloud_storage_instance, Credentials
Expand Down Expand Up @@ -113,7 +113,7 @@ def _save_task_to_db(db_task):
db_task.data.save()
db_task.save()

def _count_files(data, manifest_file=None):
def _count_files(data, manifest_files=None):
share_root = settings.SHARE_ROOT
server_files = []

Expand Down Expand Up @@ -143,8 +143,8 @@ def count_files(file_mapping, counter):
mime = get_mime(full_path)
if mime in counter:
counter[mime].append(rel_path)
elif 'manifest.jsonl' == os.path.basename(rel_path):
manifest_file.append(rel_path)
elif rel_path.endswith('.jsonl'):
manifest_files.append(rel_path)
else:
slogger.glob.warn("Skip '{}' file (its mime type doesn't "
"correspond to supported MIME file type)".format(full_path))
Expand All @@ -163,7 +163,7 @@ def count_files(file_mapping, counter):

return counter

def _validate_data(counter, manifest_file=None):
def _validate_data(counter, manifest_files=None):
unique_entries = 0
multiple_entries = 0
for media_type, media_config in MEDIA_TYPES.items():
Expand All @@ -173,7 +173,7 @@ def _validate_data(counter, manifest_file=None):
else:
multiple_entries += len(counter[media_type])

if manifest_file and media_type not in ('video', 'image'):
if manifest_files and media_type not in ('video', 'image'):
raise Exception('File with meta information can only be uploaded with video/images ')

if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1:
Expand All @@ -193,6 +193,16 @@ def _validate_data(counter, manifest_file=None):

return counter, task_modes[0]

def _validate_manifest(manifests, root_dir):
if manifests:
if len(manifests) != 1:
raise Exception('Only one manifest file can be attached with data')
full_manifest_path = os.path.join(root_dir, manifests[0])
if is_manifest(full_manifest_path):
return manifests[0]
raise Exception('Invalid manifest was uploaded')
return None

def _download_data(urls, upload_dir):
job = rq.get_current_job()
local_files = {}
Expand Down Expand Up @@ -233,48 +243,57 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
if data['remote_files'] and not isDatasetImport:
data['remote_files'] = _download_data(data['remote_files'], upload_dir)

manifest_file = []
media = _count_files(data, manifest_file)
media, task_mode = _validate_data(media, manifest_file)
if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE):
raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected")
manifest_files = []
media = _count_files(data, manifest_files)
media, task_mode = _validate_data(media, manifest_files)

if data['server_files']:
if db_data.storage == models.StorageChoice.LOCAL:
_copy_data_from_source(data['server_files'], upload_dir, data.get('server_files_path'))
elif db_data.storage == models.StorageChoice.SHARE:
upload_dir = settings.SHARE_ROOT
else: # cloud storage
if not manifest_file: raise Exception('A manifest file not found')
db_cloud_storage = db_data.cloud_storage
credentials = Credentials()
credentials.convert_from_db({
'type': db_cloud_storage.credentials_type,
'value': db_cloud_storage.credentials,
})

details = {
'resource': db_cloud_storage.resource,
'credentials': credentials,
'specific_attributes': db_cloud_storage.get_specific_attributes()
}
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
sorted_media = sort(media['image'], data['sorting_method'])
first_sorted_media_image = sorted_media[0]
cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image))

# prepare task manifest file from cloud storage manifest file
# NOTE we should create manifest before defining chunk_size
# FIXME in the future when will be implemented archive support
manifest = ImageManifestManager(db_data.get_manifest_path())
cloud_storage_manifest = ImageManifestManager(
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]),
db_data.cloud_storage.get_storage_dirname()
)
cloud_storage_manifest.set_index()
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
sorted_content = (i[1] for i in sorted(zip(sequence, content)))
manifest.create(sorted_content)
manifest_root = None
if db_data.storage in {models.StorageChoice.LOCAL, models.StorageChoice.SHARE}:
manifest_root = upload_dir
elif db_data.storage == models.StorageChoice.CLOUD_STORAGE:
manifest_root = db_data.cloud_storage.get_storage_dirname()

manifest_file = _validate_manifest(manifest_files, manifest_root)
if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE):
raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected")

if data['server_files'] and db_data.storage == models.StorageChoice.CLOUD_STORAGE:
if not manifest_file: raise Exception('A manifest file not found')
db_cloud_storage = db_data.cloud_storage
credentials = Credentials()
credentials.convert_from_db({
'type': db_cloud_storage.credentials_type,
'value': db_cloud_storage.credentials,
})

details = {
'resource': db_cloud_storage.resource,
'credentials': credentials,
'specific_attributes': db_cloud_storage.get_specific_attributes()
}
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
sorted_media = sort(media['image'], data['sorting_method'])
first_sorted_media_image = sorted_media[0]
cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image))

# prepare task manifest file from cloud storage manifest file
# NOTE we should create manifest before defining chunk_size
# FIXME in the future when will be implemented archive support
manifest = ImageManifestManager(db_data.get_manifest_path())
cloud_storage_manifest = ImageManifestManager(
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file),
db_data.cloud_storage.get_storage_dirname()
)
cloud_storage_manifest.set_index()
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
sorted_content = (i[1] for i in sorted(zip(sequence, content)))
manifest.create(sorted_content)

av_scan_paths(upload_dir)

Expand Down Expand Up @@ -432,12 +451,12 @@ def _update_status(msg):
if not media_files:
continue

# replace manifest file (e.g was uploaded 'subdir/manifest.jsonl')
# replace manifest file (e.g was uploaded 'subdir/manifest.jsonl' or 'some_manifest.jsonl')
if manifest_file and not os.path.exists(db_data.get_manifest_path()):
shutil.copyfile(os.path.join(upload_dir, manifest_file[0]),
shutil.copyfile(os.path.join(upload_dir, manifest_file),
db_data.get_manifest_path())
if upload_dir != settings.SHARE_ROOT:
os.remove(os.path.join(upload_dir, manifest_file[0]))
os.remove(os.path.join(upload_dir, manifest_file))

if task_mode == MEDIA_TYPES['video']['mode']:
try:
Expand Down
2 changes: 1 addition & 1 deletion cvat/apps/engine/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,7 @@ def content(self, request, pk):
storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details)
if not db_storage.manifests.count():
raise Exception('There is no manifest file')
manifest_path = request.query_params.get('manifest_path', 'manifest.jsonl')
manifest_path = request.query_params.get('manifest_path', db_storage.manifests.first().filename)
file_status = storage.get_file_status(manifest_path)
if file_status == Status.NOT_FOUND:
raise FileNotFoundError(errno.ENOENT,
Expand Down
2 changes: 1 addition & 1 deletion utils/dataset_manifest/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
from .core import VideoManifestManager, ImageManifestManager
from .core import VideoManifestManager, ImageManifestManager, is_manifest
Loading

0 comments on commit 5281e79

Please sign in to comment.