From b48d59b5ec2fc5a87977569c0e8378a53cf1fbae Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov <41117609+azhavoro@users.noreply.github.com> Date: Mon, 8 Jul 2019 11:10:50 +0300 Subject: [PATCH] Ability to create custom extractors (#434) * ability to add custom extractors * added configurable mimetypes * added a note to changelog --- CHANGELOG.md | 1 + cvat/apps/engine/media_extractors.py | 216 ++++++++++++++ .../0016_attribute_spec_20190217.py | 6 +- cvat/apps/engine/task.py | 266 ++++++------------ 4 files changed, 304 insertions(+), 185 deletions(-) create mode 100644 cvat/apps/engine/media_extractors.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1964ddfb613a..ceb6f440e0ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remote data source (list of URLs to create an annotation task) - Auto annotation using Faster R-CNN with Inception v2 (utils/open_model_zoo) - Auto annotation using Pixel Link mobilenet v2 - text detection (utils/open_model_zoo) +- Ability to create a custom extractors for unsupported media types ### Changed - Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before) diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py new file mode 100644 index 000000000000..bc4424ba80ac --- /dev/null +++ b/cvat/apps/engine/media_extractors.py @@ -0,0 +1,216 @@ +import os +import tempfile +import shutil +import numpy as np + +from ffmpy import FFmpeg +from pyunpack import Archive +from PIL import Image + +import mimetypes +_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) +MEDIA_MIMETYPES_FILES = [ + os.path.join(_SCRIPT_DIR, "media.mimetypes"), +] +mimetypes.init(files=MEDIA_MIMETYPES_FILES) + +def get_mime(name): + for type_name, type_def in MEDIA_TYPES.items(): + if type_def['has_mime_type'](name): + return type_name + + return 'unknown' + +class MediaExtractor: + def __init__(self, source_path, dest_path, image_quality, step, start, stop): + self._source_path = source_path + self._dest_path = dest_path + self._image_quality = image_quality + self._step = step + self._start = start + self._stop = stop + + def get_source_name(self): + return self._source_path + +#Note step, start, stop have no affect +class ImageListExtractor(MediaExtractor): + def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): + if not source_path: + raise Exception('No image found') + super().__init__( + source_path=sorted(source_path), + dest_path=dest_path, + image_quality=image_quality, + step=1, + start=0, + stop=0, + ) + + def __iter__(self): + return iter(self._source_path) + + def __getitem__(self, k): + return self._source_path[k] + + def __len__(self): + return len(self._source_path) + + def save_image(self, k, dest_path): + image = Image.open(self[k]) + # Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion + if image.mode == "I": + # Image mode is 32bit integer pixels. + # Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit + im_data = np.array(image) + im_data = im_data * (2**8 / im_data.max()) + image = Image.fromarray(im_data.astype(np.int32)) + image = image.convert('RGB') + image.save(dest_path, quality=self._image_quality, optimize=True) + height = image.height + width = image.width + image.close() + return width, height + +#Note step, start, stop have no affect +class DirectoryExtractor(ImageListExtractor): + def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): + image_paths = [] + for source in source_path: + for root, _, files in os.walk(source): + paths = [os.path.join(root, f) for f in files] + paths = filter(lambda x: get_mime(x) == 'image', paths) + image_paths.extend(paths) + super().__init__( + source_path=sorted(image_paths), + dest_path=dest_path, + image_quality=image_quality, + step=1, + start=0, + stop=0, + ) + +#Note step, start, stop have no affect +class ArchiveExtractor(DirectoryExtractor): + def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): + Archive(source_path[0]).extractall(dest_path) + super().__init__( + source_path=[dest_path], + dest_path=dest_path, + image_quality=image_quality, + step=1, + start=0, + stop=0, + ) + +class VideoExtractor(MediaExtractor): + def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): + from cvat.apps.engine.log import slogger + _dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data') + super().__init__( + source_path=source_path[0], + dest_path=_dest_path, + image_quality=image_quality, + step=step, + start=start, + stop=stop, + ) + # translate inversed range 1:95 to 2:32 + translated_quality = 96 - self._image_quality + translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2) + self._tmp_output = tempfile.mkdtemp(prefix='cvat-', suffix='.data') + target_path = os.path.join(self._tmp_output, '%d.jpg') + output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality) + filters = '' + if self._stop > 0: + filters = 'between(n,' + str(self._start) + ',' + str(self._stop) + ')' + elif self._start > 0: + filters = 'gte(n,' + str(self._start) + ')' + if self._step > 1: + filters += ('*' if filters else '') + 'not(mod(n-' + str(self._start) + ',' + str(self._step) + '))' + if filters: + output_opts += " -vf select=\"'" + filters + "'\"" + + ff = FFmpeg( + inputs = {self._source_path: None}, + outputs = {target_path: output_opts}) + + slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd)) + ff.run() + + def _getframepath(self, k): + return "{0}/{1}.jpg".format(self._tmp_output, k) + + def __iter__(self): + i = 0 + while os.path.exists(self._getframepath(i)): + yield self._getframepath(i) + i += 1 + + def __del__(self): + if self._tmp_output: + shutil.rmtree(self._tmp_output) + + def __getitem__(self, k): + return self._getframepath(k) + + def __len__(self): + return len(os.listdir(self._tmp_output)) + + def save_image(self, k, dest_path): + shutil.copyfile(self[k], dest_path) + +def _is_archive(path): + mime = mimetypes.guess_type(path) + mime_type = mime[0] + encoding = mime[1] + supportedArchives = ['application/zip', 'application/x-rar-compressed', + 'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio', + 'gzip', 'bzip2'] + return mime_type in supportedArchives or encoding in supportedArchives + +def _is_video(path): + mime = mimetypes.guess_type(path) + return mime[0] is not None and mime[0].startswith('video') + +def _is_image(path): + mime = mimetypes.guess_type(path) + return mime[0] is not None and mime[0].startswith('image') + +def _is_dir(path): + return os.path.isdir(path) + +# 'has_mime_type': function receives 1 argument - path to file. +# Should return True if file has specified media type. +# 'extractor': class that extracts images from specified media. +# 'mode': 'annotation' or 'interpolation' - mode of task that should be created. +# 'unique': True or False - describes how the type can be combined with other. +# True - only one item of this type and no other is allowed +# False - this media types can be combined with other which have unique == False + +MEDIA_TYPES = { + 'image': { + 'has_mime_type': _is_image, + 'extractor': ImageListExtractor, + 'mode': 'annotation', + 'unique': False, + }, + 'video': { + 'has_mime_type': _is_video, + 'extractor': VideoExtractor, + 'mode': 'interpolation', + 'unique': True, + }, + 'archive': { + 'has_mime_type': _is_archive, + 'extractor': ArchiveExtractor, + 'mode': 'annotation', + 'unique': True, + }, + 'directory': { + 'has_mime_type': _is_dir, + 'extractor': DirectoryExtractor, + 'mode': 'annotation', + 'unique': False, + }, +} diff --git a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py index dfb84fff98fd..27d273af2790 100644 --- a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py +++ b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py @@ -5,7 +5,7 @@ from PIL import Image from django.db import migrations from django.conf import settings -from cvat.apps.engine.task import _get_mime +from cvat.apps.engine.media_extractors import get_mime def parse_attribute(value): match = re.match(r'^([~@])(\w+)=(\w+):(.+)?$', value) @@ -81,7 +81,7 @@ def fill_task_meta_data_forward(apps, schema_editor): video = "" for root, _, files in os.walk(_get_upload_dirname(db_task)): fullnames = map(lambda f: os.path.join(root, f), files) - videos = list(filter(lambda x: _get_mime(x) == 'video', fullnames)) + videos = list(filter(lambda x: get_mime(x) == 'video', fullnames)) if len(videos): video = videos[0] break @@ -100,7 +100,7 @@ def fill_task_meta_data_forward(apps, schema_editor): filenames = [] for root, _, files in os.walk(_get_upload_dirname(db_task)): fullnames = map(lambda f: os.path.join(root, f), files) - images = filter(lambda x: _get_mime(x) == 'image', fullnames) + images = filter(lambda x: get_mime(x) == 'image', fullnames) filenames.extend(images) filenames.sort() diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index b264a3f88db0..7d1fbd188953 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -7,9 +7,6 @@ import sys import rq import shutil -import subprocess -import tempfile -import numpy as np from PIL import Image from traceback import print_exception from ast import literal_eval @@ -17,16 +14,11 @@ from urllib import parse as urlparse from urllib import request as urlrequest -import mimetypes -_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) -_MEDIA_MIMETYPES_FILE = os.path.join(_SCRIPT_DIR, "media.mimetypes") -mimetypes.init(files=[_MEDIA_MIMETYPES_FILE]) +from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES import django_rq from django.conf import settings from django.db import transaction -from ffmpy import FFmpeg -from pyunpack import Archive from distutils.dir_util import copy_tree from . import models @@ -51,49 +43,6 @@ def rq_handler(job, exc_type, exc_value, traceback): ############################# Internal implementation for server API -class _FrameExtractor: - def __init__(self, source_path, compress_quality, step=1, start=0, stop=0): - # translate inversed range 1:95 to 2:32 - translated_quality = 96 - compress_quality - translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2) - self.source = source_path - self.output = tempfile.mkdtemp(prefix='cvat-', suffix='.data') - target_path = os.path.join(self.output, '%d.jpg') - output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality) - filters = '' - if stop > 0: - filters = 'between(n,' + str(start) + ',' + str(stop) + ')' - elif start > 0: - filters = 'gte(n,' + str(start) + ')' - if step > 1: - filters += ('*' if filters else '') + 'not(mod(n-' + str(start) + ',' + str(step) + '))' - if filters: - filters = "select=\"'" + filters + "'\"" - if filters: - output_opts += ' -vf ' + filters - ff = FFmpeg( - inputs = {source_path: None}, - outputs = {target_path: output_opts}) - - slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd)) - ff.run() - - def getframepath(self, k): - return "{0}/{1}.jpg".format(self.output, k) - - def __del__(self): - if self.output: - shutil.rmtree(self.output) - - def __getitem__(self, k): - return self.getframepath(k) - - def __iter__(self): - i = 0 - while os.path.exists(self.getframepath(i)): - yield self[i] - i += 1 - def make_image_meta_cache(db_task): with open(db_task.get_image_meta_cache_path(), 'w') as meta_file: cache = { @@ -111,7 +60,7 @@ def make_image_meta_cache(db_task): filenames = [] for root, _, files in os.walk(db_task.get_upload_dirname()): fullnames = map(lambda f: os.path.join(root, f), files) - images = filter(lambda x: _get_mime(x) == 'image', fullnames) + images = filter(lambda x: get_mime(x) == 'image', fullnames) filenames.extend(images) filenames.sort() @@ -135,31 +84,6 @@ def get_image_meta_cache(db_task): with open(db_task.get_image_meta_cache_path()) as meta_cache_file: return literal_eval(meta_cache_file.read()) - -def _get_mime(name): - mime = mimetypes.guess_type(name) - mime_type = mime[0] - encoding = mime[1] - # zip, rar, tar, tar.gz, tar.bz2, 7z, cpio - supportedArchives = ['application/zip', 'application/x-rar-compressed', - 'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio', - 'gzip', 'bzip2'] - if mime_type is not None: - if mime_type.startswith('video'): - return 'video' - elif mime_type in supportedArchives or encoding in supportedArchives: - return 'archive' - elif mime_type.startswith('image'): - return 'image' - else: - return 'unknown' - else: - if os.path.isdir(name): - return 'directory' - else: - return 'unknown' - - def _copy_data_from_share(server_files, upload_dir): job = rq.get_current_job() job.meta['status'] = 'Data are being copied from share..' @@ -176,74 +100,6 @@ def _copy_data_from_share(server_files, upload_dir): os.makedirs(target_dir) shutil.copyfile(source_path, target_path) -def _unpack_archive(archive, upload_dir): - job = rq.get_current_job() - job.meta['status'] = 'Archive is being unpacked..' - job.save_meta() - - Archive(archive).extractall(upload_dir) - os.remove(archive) - -def _copy_video_to_task(video, db_task, step): - job = rq.get_current_job() - job.meta['status'] = 'Video is being extracted..' - job.save_meta() - - extractor = _FrameExtractor(video, db_task.image_quality, - step, db_task.start_frame, db_task.stop_frame) - for frame, image_orig_path in enumerate(extractor): - image_dest_path = db_task.get_frame_path(frame) - db_task.size += 1 - dirname = os.path.dirname(image_dest_path) - if not os.path.exists(dirname): - os.makedirs(dirname) - shutil.copyfile(image_orig_path, image_dest_path) - if db_task.stop_frame == 0: - db_task.stop_frame = db_task.start_frame + (db_task.size - 1) * step - - image = Image.open(db_task.get_frame_path(0)) - models.Video.objects.create(task=db_task, path=video, - width=image.width, height=image.height) - image.close() - -def _copy_images_to_task(upload_dir, db_task): - image_paths = [] - for root, _, files in os.walk(upload_dir): - paths = map(lambda f: os.path.join(root, f), files) - paths = filter(lambda x: _get_mime(x) == 'image', paths) - image_paths.extend(paths) - image_paths.sort() - - db_images = [] - if len(image_paths): - job = rq.get_current_job() - for frame, image_orig_path in enumerate(image_paths): - progress = frame * 100 // len(image_paths) - job.meta['status'] = 'Images are being compressed.. {}%'.format(progress) - job.save_meta() - image_dest_path = db_task.get_frame_path(frame) - db_task.size += 1 - dirname = os.path.dirname(image_dest_path) - if not os.path.exists(dirname): - os.makedirs(dirname) - image = Image.open(image_orig_path) - # Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion - if image.mode == "I": - # Image mode is 32bit integer pixels. - # Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit - im_data = np.array(image) - im_data = im_data * (2**8 / im_data.max()) - image = Image.fromarray(im_data.astype(np.int32)) - image = image.convert('RGB') - image.save(image_dest_path, quality=db_task.image_quality, optimize=True) - db_images.append(models.Image(task=db_task, path=image_orig_path, - frame=frame, width=image.width, height=image.height)) - image.close() - - models.Image.objects.bulk_create(db_images) - else: - raise ValueError("Image files were not found") - def _save_task_to_db(db_task): job = rq.get_current_job() job.meta['status'] = 'Task is being saved in database' @@ -296,7 +152,7 @@ def _validate_data(data): if '..' in path.split(os.path.sep): raise ValueError("Don't use '..' inside file paths") full_path = os.path.abspath(os.path.join(share_root, path)) - if 'directory' == _get_mime(full_path): + if 'directory' == get_mime(full_path): server_files['dirs'].append(path) else: server_files['files'].append(path) @@ -308,43 +164,42 @@ def _validate_data(data): if not [ f_name for f_name in server_files['files'] if f_name.startswith(dir_name)]] def count_files(file_mapping, counter): - archive = None - video = None for rel_path, full_path in file_mapping.items(): - mime = _get_mime(full_path) - counter[mime] += 1 - if mime == "archive": - archive = rel_path - elif mime == "video": - video = rel_path - return video, archive + mime = get_mime(full_path) + counter[mime].append(rel_path) - counter = {"image": 0, "video": 0, "archive": 0, "directory": 0} + counter = { media_type: [] for media_type in MEDIA_TYPES.keys() } - client_video, client_archive = count_files( + count_files( file_mapping={ f:f for f in data['remote_files'] or data['client_files']}, counter=counter, ) - server_video, server_archive = count_files( + count_files( file_mapping={ f:os.path.abspath(os.path.join(share_root, f)) for f in data['server_files']}, counter=counter, ) - num_videos = counter["video"] - num_archives = counter["archive"] - num_images = counter["image"] + counter["directory"] - if (num_videos > 1 or num_archives > 1 or - (num_videos == 1 and num_archives + num_images > 0) or - (num_archives == 1 and num_videos + num_images > 0) or - (num_images > 0 and num_archives + num_videos > 0)): + unique_entries = 0 + multiple_entries = 0 + for media_type, media_config in MEDIA_TYPES.items(): + if counter[media_type]: + if media_config['unique']: + unique_entries += len(counter[media_type]) + else: + multiple_entries += len(counter[media_type]) + + if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1: + unique_types = ', '.join([k for k, v in MEDIA_TYPES.items() if v['unique']]) + multiply_types = ', '.join([k for k, v in MEDIA_TYPES.items() if not v['unique']]) + count = ', '.join(['{} {}(s)'.format(len(v), k) for k, v in counter.items()]) + raise ValueError('Only one {} or many {} can be used simultaneously, \ + but {} found.'.format(unique_types, multiply_types, count)) - raise ValueError("Only one archive, one video or many images can be \ - dowloaded simultaneously. {} image(s), {} dir(s), {} video(s), {} \ - archive(s) found".format(counter['image'], counter['directory'], - counter['video'], counter['archive'])) + if unique_entries == 0 and multiple_entries == 0: + raise ValueError('No media data found') - return client_video or server_video, client_archive or server_archive + return counter def _download_data(urls, upload_dir): job = rq.get_current_job() @@ -382,25 +237,72 @@ def _create_thread(tid, data): raise NotImplementedError("Adding more data is not implemented") upload_dir = db_task.get_upload_dirname() + if data['remote_files']: data['remote_files'] = _download_data(data['remote_files'], upload_dir) - video, archive = _validate_data(data) + + media = _validate_data(data) if data['server_files']: _copy_data_from_share(data['server_files'], upload_dir) - if archive: - archive = os.path.join(upload_dir, archive) - _unpack_archive(archive, upload_dir) + job = rq.get_current_job() + job.meta['status'] = 'Media files is being extracted...' + job.save_meta() - if video: - db_task.mode = "interpolation" - video = os.path.join(upload_dir, video) - _copy_video_to_task(video, db_task, db_task.get_frame_step()) + db_images = [] + extractors = [] + length = 0 + for media_type, media_files in media.items(): + if not media_files: + continue + + extractor = MEDIA_TYPES[media_type]['extractor']( + source_path=[os.path.join(upload_dir, f) for f in media_files], + dest_path=upload_dir, + image_quality=db_task.image_quality, + step=db_task.get_frame_step(), + start=db_task.start_frame, + stop=db_task.stop_frame, + ) + length += len(extractor) + db_task.mode = MEDIA_TYPES[media_type]['mode'] + extractors.append(extractor) + + for extractor in extractors: + for frame, image_orig_path in enumerate(extractor): + image_dest_path = db_task.get_frame_path(db_task.size) + dirname = os.path.dirname(image_dest_path) + + if not os.path.exists(dirname): + os.makedirs(dirname) + + if db_task.mode == 'interpolation': + extractor.save_image(frame, image_dest_path) + else: + width, height = extractor.save_image(frame, image_dest_path) + db_images.append(models.Image( + task=db_task, + path=image_orig_path, + frame=db_task.size, + width=width, height=height)) + + db_task.size += 1 + progress = frame * 100 // length + job.meta['status'] = 'Images are being compressed... {}%'.format(progress) + job.save_meta() + + if db_task.mode == 'interpolation': + image = Image.open(db_task.get_frame_path(0)) + models.Video.objects.create( + task=db_task, + path=extractors[0].get_source_name(), + width=image.width, height=image.height) + image.close() + if db_task.stop_frame == 0: + db_task.stop_frame = db_task.start_frame + (db_task.size - 1) * db_task.get_frame_step() else: - db_task.mode = "annotation" - _copy_images_to_task(upload_dir, db_task) + models.Image.objects.bulk_create(db_images) slogger.glob.info("Founded frames {} for task #{}".format(db_task.size, tid)) _save_task_to_db(db_task) -