From e8725cfde88348e4a3cc8bed8d1755ffa6714aab Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Mon, 29 Apr 2019 17:20:03 +0300 Subject: [PATCH 01/12] ability to add custom extractors --- cvat/apps/engine/media_extractors.py | 115 ++++++++++ .../0016_attribute_spec_20190217.py | 2 +- cvat/apps/engine/mime.py | 33 +++ cvat/apps/engine/settings.py | 23 ++ cvat/apps/engine/task.py | 212 +++++------------- 5 files changed, 225 insertions(+), 160 deletions(-) create mode 100644 cvat/apps/engine/media_extractors.py create mode 100644 cvat/apps/engine/mime.py create mode 100644 cvat/apps/engine/settings.py diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py new file mode 100644 index 000000000000..eb876fff2e5a --- /dev/null +++ b/cvat/apps/engine/media_extractors.py @@ -0,0 +1,115 @@ +import os +import tempfile +import shutil +import numpy as np + +from ffmpy import FFmpeg +from pyunpack import Archive +from PIL import Image + +from cvat.apps.engine.mime import get_mime +from .log import slogger + + +class MediaExtractor: + def __init__(self, source_path, dest_path, compress_quality): + self._source_path = source_path + self._dest_path = dest_path + self._compress_quality = compress_quality + + def __getitem__(self, k): + pass + + def __iter__(self): + pass + +class ImageListExtractor(MediaExtractor): + def __init__(self, source_path, dest_path, compress_quality): + return super().__init__(source_path, dest_path, compress_quality) + + def __iter__(self): + return iter(self._source_path) + + def __getitem__(self, k): + return self._source_path[k] + + def __len__(self): + return len(self._source_path) + + def save_image(self, k, dest_path): + image = Image.open(self[k]) + # Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion + if image.mode == "I": + # Image mode is 32bit integer pixels. + # Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit + im_data = np.array(image) + im_data = im_data * (2**8 / im_data.max()) + image = Image.fromarray(im_data.astype(np.int32)) + image = image.convert('RGB') + image.save(dest_path, quality=self._compress_quality, optimize=True) + height = image.height + width = image.width + image.close() + return width, height + +class DirectoryExtractor(ImageListExtractor): + def __init__(self, source_path, dest_path, compress_quality): + image_paths = [] + for root, _, files in os.walk(source_path[0]): + paths = [os.path.join(root, f) for f in files] + paths = filter(lambda x: get_mime(x) == 'image', paths) + image_paths.extend(paths) + image_paths.sort() + super().__init__(image_paths, dest_path, compress_quality) + +class ArchiveExtractor(ImageListExtractor): + def __init__(self, source_path, dest_path, compress_quality): + Archive(source_path[0]).extractall(dest_path) + os.remove(source_path[0]) + image_paths = [] + for root, _, files in os.walk(dest_path): + paths = [os.path.join(root, f) for f in files] + paths = filter(lambda x: get_mime(x) == 'image', paths) + image_paths.extend(paths) + image_paths.sort() + super().__init__(image_paths, dest_path, compress_quality) + +class VideoExtractor(MediaExtractor): + def __init__(self, source_path, dest_path, compress_quality): + _dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data') + super().__init__(source_path[0], _dest_path, compress_quality) + # translate inversed range 1:95 to 2:32 + translated_quality = 96 - self._compress_quality + translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2) + self._tmp_output = tempfile.mkdtemp(prefix='cvat-', suffix='.data') + target_path = os.path.join(self._tmp_output, '%d.jpg') + output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality) + + ff = FFmpeg( + inputs = {self._source_path: None}, + outputs = {target_path: output_opts}) + + slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd)) + ff.run() + + def _getframepath(self, k): + return "{0}/{1}.jpg".format(self._tmp_output, k) + + def __iter__(self): + i = 0 + while os.path.exists(self._getframepath(i)): + yield self._getframepath(i) + i += 1 + + def __del__(self): + if self._tmp_output: + shutil.rmtree(self._tmp_output) + + def __getitem__(self, k): + return self._getframepath(k) + + def __len__(self): + return len(os.listdir(self._tmp_output)) + + def save_image(self, k, dest_path): + shutil.copyfile(self[k], dest_path) diff --git a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py index dfb84fff98fd..222095633f61 100644 --- a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py +++ b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py @@ -5,7 +5,7 @@ from PIL import Image from django.db import migrations from django.conf import settings -from cvat.apps.engine.task import _get_mime +from cvat.apps.engine.mime import get_mime def parse_attribute(value): match = re.match(r'^([~@])(\w+)=(\w+):(.+)?$', value) diff --git a/cvat/apps/engine/mime.py b/cvat/apps/engine/mime.py new file mode 100644 index 000000000000..16d41b8f33ba --- /dev/null +++ b/cvat/apps/engine/mime.py @@ -0,0 +1,33 @@ +import os +import mimetypes +_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) +_MEDIA_MIMETYPES_FILE = os.path.join(_SCRIPT_DIR, "media.mimetypes") +mimetypes.init(files=[_MEDIA_MIMETYPES_FILE]) + +def is_archive(path): + mime = mimetypes.guess_type(path) + mime_type = mime[0] + encoding = mime[1] + supportedArchives = ['application/zip', 'application/x-rar-compressed', + 'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio', + 'gzip', 'bzip2'] + return mime_type in supportedArchives or encoding in supportedArchives + +def is_video(path): + mime = mimetypes.guess_type(path) + return mime[0] is not None and mime[0].startswith('video') + +def is_image(path): + mime = mimetypes.guess_type(path) + return mime[0] is not None and mime[0].startswith('image') + +def is_dir(path): + return os.path.isdir(path) + +def get_mime(name): + from cvat.apps.engine.settings import MEDIA_TYPES + for type_name, type_def in MEDIA_TYPES.items(): + if type_def['has_mime_type'](name): + return type_name + + return 'unknown' diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py new file mode 100644 index 000000000000..0512246a144e --- /dev/null +++ b/cvat/apps/engine/settings.py @@ -0,0 +1,23 @@ +import os +from cvat.apps.engine.mime import is_image, is_video, is_archive, is_dir +from cvat.apps.engine.media_extractors import ImageListExtractor, DirectoryExtractor, \ + VideoExtractor, ArchiveExtractor + +MEDIA_TYPES = { + 'image': { + 'has_mime_type': is_image, + 'extractor': ImageListExtractor, + }, + 'video': { + 'has_mime_type': is_video, + 'extractor': VideoExtractor, + }, + 'archive': { + 'has_mime_type': is_archive, + 'extractor': ArchiveExtractor, + }, + 'directory': { + 'has_mime_type': is_dir, + 'extractor': DirectoryExtractor, + }, +} diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 5160c7d7f82a..025560607866 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -13,16 +13,12 @@ from traceback import print_exception from ast import literal_eval -import mimetypes -_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) -_MEDIA_MIMETYPES_FILE = os.path.join(_SCRIPT_DIR, "media.mimetypes") -mimetypes.init(files=[_MEDIA_MIMETYPES_FILE]) +from cvat.apps.engine.mime import get_mime +from cvat.apps.engine.settings import MEDIA_TYPES import django_rq from django.conf import settings from django.db import transaction -from ffmpy import FFmpeg -from pyunpack import Archive from distutils.dir_util import copy_tree from . import models @@ -47,39 +43,6 @@ def rq_handler(job, exc_type, exc_value, traceback): ############################# Internal implementation for server API -class _FrameExtractor: - def __init__(self, source_path, compress_quality, flip_flag=False): - # translate inversed range 1:95 to 2:32 - translated_quality = 96 - compress_quality - translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2) - self.output = tempfile.mkdtemp(prefix='cvat-', suffix='.data') - target_path = os.path.join(self.output, '%d.jpg') - output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality) - if flip_flag: - output_opts += ' -vf "transpose=2,transpose=2"' - ff = FFmpeg( - inputs = {source_path: None}, - outputs = {target_path: output_opts}) - - slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd)) - ff.run() - - def getframepath(self, k): - return "{0}/{1}.jpg".format(self.output, k) - - def __del__(self): - if self.output: - shutil.rmtree(self.output) - - def __getitem__(self, k): - return self.getframepath(k) - - def __iter__(self): - i = 0 - while os.path.exists(self.getframepath(i)): - yield self[i] - i += 1 - def make_image_meta_cache(db_task): with open(db_task.get_image_meta_cache_path(), 'w') as meta_file: cache = { @@ -97,7 +60,7 @@ def make_image_meta_cache(db_task): filenames = [] for root, _, files in os.walk(db_task.get_upload_dirname()): fullnames = map(lambda f: os.path.join(root, f), files) - images = filter(lambda x: _get_mime(x) == 'image', fullnames) + images = filter(lambda x: get_mime(x) == 'image', fullnames) filenames.extend(images) filenames.sort() @@ -121,31 +84,6 @@ def get_image_meta_cache(db_task): with open(db_task.get_image_meta_cache_path()) as meta_cache_file: return literal_eval(meta_cache_file.read()) - -def _get_mime(name): - mime = mimetypes.guess_type(name) - mime_type = mime[0] - encoding = mime[1] - # zip, rar, tar, tar.gz, tar.bz2, 7z, cpio - supportedArchives = ['application/zip', 'application/x-rar-compressed', - 'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio', - 'gzip', 'bzip2'] - if mime_type is not None: - if mime_type.startswith('video'): - return 'video' - elif mime_type in supportedArchives or encoding in supportedArchives: - return 'archive' - elif mime_type.startswith('image'): - return 'image' - else: - return 'unknown' - else: - if os.path.isdir(name): - return 'directory' - else: - return 'unknown' - - def _copy_data_from_share(server_files, upload_dir): job = rq.get_current_job() job.meta['status'] = 'Data are being copied from share..' @@ -162,72 +100,6 @@ def _copy_data_from_share(server_files, upload_dir): os.makedirs(target_dir) shutil.copyfile(source_path, target_path) -def _unpack_archive(archive, upload_dir): - job = rq.get_current_job() - job.meta['status'] = 'Archive is being unpacked..' - job.save_meta() - - Archive(archive).extractall(upload_dir) - os.remove(archive) - -def _copy_video_to_task(video, db_task): - job = rq.get_current_job() - job.meta['status'] = 'Video is being extracted..' - job.save_meta() - - extractor = _FrameExtractor(video, db_task.image_quality) - for frame, image_orig_path in enumerate(extractor): - image_dest_path = db_task.get_frame_path(frame) - db_task.size += 1 - dirname = os.path.dirname(image_dest_path) - if not os.path.exists(dirname): - os.makedirs(dirname) - shutil.copyfile(image_orig_path, image_dest_path) - - image = Image.open(db_task.get_frame_path(0)) - models.Video.objects.create(task=db_task, path=video, - start_frame=0, stop_frame=db_task.size, step=1, - width=image.width, height=image.height) - image.close() - -def _copy_images_to_task(upload_dir, db_task): - image_paths = [] - for root, _, files in os.walk(upload_dir): - paths = map(lambda f: os.path.join(root, f), files) - paths = filter(lambda x: _get_mime(x) == 'image', paths) - image_paths.extend(paths) - image_paths.sort() - - db_images = [] - if len(image_paths): - job = rq.get_current_job() - for frame, image_orig_path in enumerate(image_paths): - progress = frame * 100 // len(image_paths) - job.meta['status'] = 'Images are being compressed.. {}%'.format(progress) - job.save_meta() - image_dest_path = db_task.get_frame_path(frame) - db_task.size += 1 - dirname = os.path.dirname(image_dest_path) - if not os.path.exists(dirname): - os.makedirs(dirname) - image = Image.open(image_orig_path) - # Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion - if image.mode == "I": - # Image mode is 32bit integer pixels. - # Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit - im_data = np.array(image) - im_data = im_data * (2**8 / im_data.max()) - image = Image.fromarray(im_data.astype(np.int32)) - image = image.convert('RGB') - image.save(image_dest_path, quality=db_task.image_quality, optimize=True) - db_images.append(models.Image(task=db_task, path=image_orig_path, - frame=frame, width=image.width, height=image.height)) - image.close() - - models.Image.objects.bulk_create(db_images) - else: - raise ValueError("Image files were not found") - def _save_task_to_db(db_task): job = rq.get_current_job() job.meta['status'] = 'Task is being saved in database' @@ -280,7 +152,7 @@ def _validate_data(data): if '..' in path.split(os.path.sep): raise ValueError("Don't use '..' inside file paths") full_path = os.path.abspath(os.path.join(share_root, path)) - if 'directory' == _get_mime(full_path): + if 'directory' == get_mime(full_path): server_files['dirs'].append(path) else: server_files['files'].append(path) @@ -292,32 +164,25 @@ def _validate_data(data): if not [ f_name for f_name in server_files['files'] if f_name.startswith(dir_name)]] def count_files(file_mapping, counter): - archive = None - video = None for rel_path, full_path in file_mapping.items(): - mime = _get_mime(full_path) - counter[mime] += 1 - if mime == "archive": - archive = rel_path - elif mime == "video": - video = rel_path - return video, archive + mime = get_mime(full_path) + counter[mime].append(rel_path) - counter = {"image": 0, "video": 0, "archive": 0, "directory": 0} + counter = { media_type: [] for media_type in MEDIA_TYPES.keys() } - client_video, client_archive = count_files( + count_files( file_mapping={ f:f for f in data['client_files']}, counter=counter, ) - server_video, server_archive = count_files( + count_files( file_mapping={ f:os.path.abspath(os.path.join(share_root, f)) for f in data['server_files']}, counter=counter, ) - num_videos = counter["video"] - num_archives = counter["archive"] - num_images = counter["image"] + counter["directory"] + num_videos = len(counter["video"]) + num_archives = len(counter["archive"]) + num_images = len(counter["image"]) + len(counter["directory"]) if (num_videos > 1 or num_archives > 1 or (num_videos == 1 and num_archives + num_images > 0) or (num_archives == 1 and num_videos + num_images > 0) or @@ -328,7 +193,7 @@ def count_files(file_mapping, counter): archive(s) found".format(counter['image'], counter['directory'], counter['video'], counter['archive'])) - return client_video or server_video, client_archive or server_archive + return counter @transaction.atomic def _create_thread(tid, data): @@ -339,23 +204,52 @@ def _create_thread(tid, data): raise NotImplementedError("Adding more data is not implemented") upload_dir = db_task.get_upload_dirname() - video, archive = _validate_data(data) + media = _validate_data(data) if data['server_files']: _copy_data_from_share(data['server_files'], upload_dir) - if archive: - archive = os.path.join(upload_dir, archive) - _unpack_archive(archive, upload_dir) + job = rq.get_current_job() + + db_images = [] + + db_task.mode = 'interpolation' if media['video'] else 'annotation' + for media_type, media_files in media.items(): + if not media_files: + continue + + extractor = MEDIA_TYPES[media_type]['extractor']( + source_path=[os.path.join(upload_dir, f) for f in media_files], + dest_path=upload_dir, + image_quality=db_task.image_quality, + ) - if video: - db_task.mode = "interpolation" - video = os.path.join(upload_dir, video) - _copy_video_to_task(video, db_task) + for frame, image_orig_path in enumerate(extractor): + image_dest_path = db_task.get_frame_path(frame) + db_task.size += 1 + dirname = os.path.dirname(image_dest_path) + if not os.path.exists(dirname): + os.makedirs(dirname) + if db_task.mode == 'interpolation': + job.meta['status'] = 'Video is being extracted..' + job.save_meta() + extractor.save_image(frame, image_dest_path) + else: + progress = frame * 100 // len(extractor) + job.meta['status'] = 'Images are being compressed.. {}%'.format(progress) + job.save_meta() + width, height = extractor.save_image(frame, image_dest_path) + db_images.append(models.Image(task=db_task, path=image_orig_path, + frame=frame, width=width, height=height)) + + if db_task.mode == 'interpolation': + image = Image.open(db_task.get_frame_path(0)) + models.Video.objects.create(task=db_task, path=media['video'][0], + start_frame=0, stop_frame=db_task.size, step=1, + width=image.width, height=image.height) + image.close() else: - db_task.mode = "annotation" - _copy_images_to_task(upload_dir, db_task) + models.Image.objects.bulk_create(db_images) slogger.glob.info("Founded frames {} for task #{}".format(db_task.size, tid)) _save_task_to_db(db_task) - From 09cdea88a13153e9b198730940fef56d5dc24191 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Mon, 29 Apr 2019 17:51:51 +0300 Subject: [PATCH 02/12] added configurable mimetypes --- cvat/apps/engine/media_extractors.py | 29 ++++++------ .../0016_attribute_spec_20190217.py | 2 +- cvat/apps/engine/mime.py | 33 -------------- cvat/apps/engine/settings.py | 45 ++++++++++++++++--- cvat/apps/engine/task.py | 9 ++-- 5 files changed, 60 insertions(+), 58 deletions(-) delete mode 100644 cvat/apps/engine/mime.py diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index eb876fff2e5a..8ef54f749035 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -7,15 +7,14 @@ from pyunpack import Archive from PIL import Image -from cvat.apps.engine.mime import get_mime from .log import slogger class MediaExtractor: - def __init__(self, source_path, dest_path, compress_quality): + def __init__(self, source_path, dest_path, image_quality): self._source_path = source_path self._dest_path = dest_path - self._compress_quality = compress_quality + self._image_quality = image_quality def __getitem__(self, k): pass @@ -24,8 +23,8 @@ def __iter__(self): pass class ImageListExtractor(MediaExtractor): - def __init__(self, source_path, dest_path, compress_quality): - return super().__init__(source_path, dest_path, compress_quality) + def __init__(self, source_path, dest_path, image_quality): + return super().__init__(source_path, dest_path, image_quality) def __iter__(self): return iter(self._source_path) @@ -46,40 +45,42 @@ def save_image(self, k, dest_path): im_data = im_data * (2**8 / im_data.max()) image = Image.fromarray(im_data.astype(np.int32)) image = image.convert('RGB') - image.save(dest_path, quality=self._compress_quality, optimize=True) + image.save(dest_path, quality=self._image_quality, optimize=True) height = image.height width = image.width image.close() return width, height class DirectoryExtractor(ImageListExtractor): - def __init__(self, source_path, dest_path, compress_quality): + def __init__(self, source_path, dest_path, image_quality): + from cvat.apps.engine.settings import _get_mime image_paths = [] for root, _, files in os.walk(source_path[0]): paths = [os.path.join(root, f) for f in files] paths = filter(lambda x: get_mime(x) == 'image', paths) image_paths.extend(paths) image_paths.sort() - super().__init__(image_paths, dest_path, compress_quality) + super().__init__(image_paths, dest_path, image_quality) class ArchiveExtractor(ImageListExtractor): - def __init__(self, source_path, dest_path, compress_quality): + def __init__(self, source_path, dest_path, image_quality): + from cvat.apps.engine.settings import _get_mime Archive(source_path[0]).extractall(dest_path) os.remove(source_path[0]) image_paths = [] for root, _, files in os.walk(dest_path): paths = [os.path.join(root, f) for f in files] - paths = filter(lambda x: get_mime(x) == 'image', paths) + paths = filter(lambda x: _get_mime(x) == 'image', paths) image_paths.extend(paths) image_paths.sort() - super().__init__(image_paths, dest_path, compress_quality) + super().__init__(image_paths, dest_path, image_quality) class VideoExtractor(MediaExtractor): - def __init__(self, source_path, dest_path, compress_quality): + def __init__(self, source_path, dest_path, image_quality): _dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data') - super().__init__(source_path[0], _dest_path, compress_quality) + super().__init__(source_path[0], _dest_path, image_quality) # translate inversed range 1:95 to 2:32 - translated_quality = 96 - self._compress_quality + translated_quality = 96 - self._image_quality translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2) self._tmp_output = tempfile.mkdtemp(prefix='cvat-', suffix='.data') target_path = os.path.join(self._tmp_output, '%d.jpg') diff --git a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py index 222095633f61..25dcd61310b0 100644 --- a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py +++ b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py @@ -5,7 +5,7 @@ from PIL import Image from django.db import migrations from django.conf import settings -from cvat.apps.engine.mime import get_mime +from cvat.apps.engine.settings import _get_mime def parse_attribute(value): match = re.match(r'^([~@])(\w+)=(\w+):(.+)?$', value) diff --git a/cvat/apps/engine/mime.py b/cvat/apps/engine/mime.py deleted file mode 100644 index 16d41b8f33ba..000000000000 --- a/cvat/apps/engine/mime.py +++ /dev/null @@ -1,33 +0,0 @@ -import os -import mimetypes -_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) -_MEDIA_MIMETYPES_FILE = os.path.join(_SCRIPT_DIR, "media.mimetypes") -mimetypes.init(files=[_MEDIA_MIMETYPES_FILE]) - -def is_archive(path): - mime = mimetypes.guess_type(path) - mime_type = mime[0] - encoding = mime[1] - supportedArchives = ['application/zip', 'application/x-rar-compressed', - 'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio', - 'gzip', 'bzip2'] - return mime_type in supportedArchives or encoding in supportedArchives - -def is_video(path): - mime = mimetypes.guess_type(path) - return mime[0] is not None and mime[0].startswith('video') - -def is_image(path): - mime = mimetypes.guess_type(path) - return mime[0] is not None and mime[0].startswith('image') - -def is_dir(path): - return os.path.isdir(path) - -def get_mime(name): - from cvat.apps.engine.settings import MEDIA_TYPES - for type_name, type_def in MEDIA_TYPES.items(): - if type_def['has_mime_type'](name): - return type_name - - return 'unknown' diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py index 0512246a144e..cba200fcb135 100644 --- a/cvat/apps/engine/settings.py +++ b/cvat/apps/engine/settings.py @@ -1,23 +1,58 @@ import os -from cvat.apps.engine.mime import is_image, is_video, is_archive, is_dir +import mimetypes from cvat.apps.engine.media_extractors import ImageListExtractor, DirectoryExtractor, \ VideoExtractor, ArchiveExtractor +_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) + +MEDIA_MIMETYPES_FILES = [ + os.path.join(_SCRIPT_DIR, "media.mimetypes") +] + +mimetypes.init(files=MEDIA_MIMETYPES_FILES) + +def _is_archive(path): + mime = mimetypes.guess_type(path) + mime_type = mime[0] + encoding = mime[1] + supportedArchives = ['application/zip', 'application/x-rar-compressed', + 'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio', + 'gzip', 'bzip2'] + return mime_type in supportedArchives or encoding in supportedArchives + +def _is_video(path): + mime = mimetypes.guess_type(path) + return mime[0] is not None and mime[0].startswith('video') + +def _is_image(path): + mime = mimetypes.guess_type(path) + return mime[0] is not None and mime[0].startswith('image') + +def _is_dir(path): + return os.path.isdir(path) MEDIA_TYPES = { 'image': { - 'has_mime_type': is_image, + 'has_mime_type': _is_image, 'extractor': ImageListExtractor, }, 'video': { - 'has_mime_type': is_video, + 'has_mime_type': _is_video, 'extractor': VideoExtractor, }, 'archive': { - 'has_mime_type': is_archive, + 'has_mime_type': _is_archive, 'extractor': ArchiveExtractor, }, 'directory': { - 'has_mime_type': is_dir, + 'has_mime_type': _is_dir, 'extractor': DirectoryExtractor, }, } + +def _get_mime(name): + from cvat.apps.engine.settings import MEDIA_TYPES + for type_name, type_def in MEDIA_TYPES.items(): + if type_def['has_mime_type'](name): + return type_name + + return 'unknown' diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 025560607866..5e97ee371d6b 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -13,8 +13,7 @@ from traceback import print_exception from ast import literal_eval -from cvat.apps.engine.mime import get_mime -from cvat.apps.engine.settings import MEDIA_TYPES +from cvat.apps.engine.settings import _get_mime, MEDIA_TYPES import django_rq from django.conf import settings @@ -60,7 +59,7 @@ def make_image_meta_cache(db_task): filenames = [] for root, _, files in os.walk(db_task.get_upload_dirname()): fullnames = map(lambda f: os.path.join(root, f), files) - images = filter(lambda x: get_mime(x) == 'image', fullnames) + images = filter(lambda x: _get_mime(x) == 'image', fullnames) filenames.extend(images) filenames.sort() @@ -152,7 +151,7 @@ def _validate_data(data): if '..' in path.split(os.path.sep): raise ValueError("Don't use '..' inside file paths") full_path = os.path.abspath(os.path.join(share_root, path)) - if 'directory' == get_mime(full_path): + if 'directory' == _get_mime(full_path): server_files['dirs'].append(path) else: server_files['files'].append(path) @@ -165,7 +164,7 @@ def _validate_data(data): def count_files(file_mapping, counter): for rel_path, full_path in file_mapping.items(): - mime = get_mime(full_path) + mime = _get_mime(full_path) counter[mime].append(rel_path) counter = { media_type: [] for media_type in MEDIA_TYPES.keys() } From c916c485768a90370fc293e7fb90c88d956f0309 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Mon, 29 Apr 2019 20:08:41 +0300 Subject: [PATCH 03/12] minor fix --- cvat/apps/engine/media_extractors.py | 7 +++---- cvat/apps/engine/settings.py | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 8ef54f749035..e42c014e6179 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -7,9 +7,6 @@ from pyunpack import Archive from PIL import Image -from .log import slogger - - class MediaExtractor: def __init__(self, source_path, dest_path, image_quality): self._source_path = source_path @@ -24,6 +21,8 @@ def __iter__(self): class ImageListExtractor(MediaExtractor): def __init__(self, source_path, dest_path, image_quality): + if not source_path: + raise Exception('No image found') return super().__init__(source_path, dest_path, image_quality) def __iter__(self): @@ -57,7 +56,7 @@ def __init__(self, source_path, dest_path, image_quality): image_paths = [] for root, _, files in os.walk(source_path[0]): paths = [os.path.join(root, f) for f in files] - paths = filter(lambda x: get_mime(x) == 'image', paths) + paths = filter(lambda x: _get_mime(x) == 'image', paths) image_paths.extend(paths) image_paths.sort() super().__init__(image_paths, dest_path, image_quality) diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py index cba200fcb135..7fe1874e542b 100644 --- a/cvat/apps/engine/settings.py +++ b/cvat/apps/engine/settings.py @@ -5,7 +5,7 @@ _SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) MEDIA_MIMETYPES_FILES = [ - os.path.join(_SCRIPT_DIR, "media.mimetypes") + os.path.join(_SCRIPT_DIR, "media.mimetypes"), ] mimetypes.init(files=MEDIA_MIMETYPES_FILES) @@ -50,7 +50,6 @@ def _is_dir(path): } def _get_mime(name): - from cvat.apps.engine.settings import MEDIA_TYPES for type_name, type_def in MEDIA_TYPES.items(): if type_def['has_mime_type'](name): return type_name From 00cdf893dbb9a35bc90e35fb2b55cfb6f1436970 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Mon, 29 Apr 2019 20:30:18 +0300 Subject: [PATCH 04/12] fixed logging --- cvat/apps/engine/media_extractors.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index e42c014e6179..6f817e09d99d 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -13,12 +13,6 @@ def __init__(self, source_path, dest_path, image_quality): self._dest_path = dest_path self._image_quality = image_quality - def __getitem__(self, k): - pass - - def __iter__(self): - pass - class ImageListExtractor(MediaExtractor): def __init__(self, source_path, dest_path, image_quality): if not source_path: @@ -76,6 +70,7 @@ def __init__(self, source_path, dest_path, image_quality): class VideoExtractor(MediaExtractor): def __init__(self, source_path, dest_path, image_quality): + from cvat.apps.engine.log import slogger _dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data') super().__init__(source_path[0], _dest_path, image_quality) # translate inversed range 1:95 to 2:32 From a94327c97fcc087d503b7cdaf892b1cad9565394 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Mon, 29 Apr 2019 22:15:49 +0300 Subject: [PATCH 05/12] fixed directory extractor --- cvat/apps/engine/media_extractors.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 6f817e09d99d..094478db5a4f 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -48,10 +48,11 @@ class DirectoryExtractor(ImageListExtractor): def __init__(self, source_path, dest_path, image_quality): from cvat.apps.engine.settings import _get_mime image_paths = [] - for root, _, files in os.walk(source_path[0]): - paths = [os.path.join(root, f) for f in files] - paths = filter(lambda x: _get_mime(x) == 'image', paths) - image_paths.extend(paths) + for source in source_path: + for root, _, files in os.walk(source): + paths = [os.path.join(root, f) for f in files] + paths = filter(lambda x: _get_mime(x) == 'image', paths) + image_paths.extend(paths) image_paths.sort() super().__init__(image_paths, dest_path, image_quality) From f7d43e17a49fb8fcf2cb7106e256f79a348d53ef Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Tue, 30 Apr 2019 09:27:20 +0300 Subject: [PATCH 06/12] fixed codacy issues --- cvat/apps/engine/media_extractors.py | 2 +- cvat/apps/engine/task.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 094478db5a4f..6512c5e9c8b0 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -17,7 +17,7 @@ class ImageListExtractor(MediaExtractor): def __init__(self, source_path, dest_path, image_quality): if not source_path: raise Exception('No image found') - return super().__init__(source_path, dest_path, image_quality) + super().__init__(source_path, dest_path, image_quality) def __iter__(self): return iter(self._source_path) diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 5e97ee371d6b..3fffb9e81b73 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -7,8 +7,6 @@ import sys import rq import shutil -import tempfile -import numpy as np from PIL import Image from traceback import print_exception from ast import literal_eval From 90cebc3bd343b2c520d785f385f6634a6f09377a Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Tue, 30 Apr 2019 12:56:26 +0300 Subject: [PATCH 07/12] added task mode settings --- cvat/apps/engine/settings.py | 4 ++++ cvat/apps/engine/task.py | 19 +++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py index 7fe1874e542b..7eb19a644f6d 100644 --- a/cvat/apps/engine/settings.py +++ b/cvat/apps/engine/settings.py @@ -34,18 +34,22 @@ def _is_dir(path): 'image': { 'has_mime_type': _is_image, 'extractor': ImageListExtractor, + 'mode': 'annotation', }, 'video': { 'has_mime_type': _is_video, 'extractor': VideoExtractor, + 'mode': 'interpolation', }, 'archive': { 'has_mime_type': _is_archive, 'extractor': ArchiveExtractor, + 'mode': 'annotation', }, 'directory': { 'has_mime_type': _is_dir, 'extractor': DirectoryExtractor, + 'mode': 'annotation', }, } diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 3fffb9e81b73..323656a51f72 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -207,10 +207,7 @@ def _create_thread(tid, data): _copy_data_from_share(data['server_files'], upload_dir) job = rq.get_current_job() - db_images = [] - - db_task.mode = 'interpolation' if media['video'] else 'annotation' for media_type, media_files in media.items(): if not media_files: continue @@ -221,6 +218,7 @@ def _create_thread(tid, data): image_quality=db_task.image_quality, ) + db_task.mode = MEDIA_TYPES[media_type]['mode'] for frame, image_orig_path in enumerate(extractor): image_dest_path = db_task.get_frame_path(frame) db_task.size += 1 @@ -239,13 +237,14 @@ def _create_thread(tid, data): db_images.append(models.Image(task=db_task, path=image_orig_path, frame=frame, width=width, height=height)) - if db_task.mode == 'interpolation': - image = Image.open(db_task.get_frame_path(0)) - models.Video.objects.create(task=db_task, path=media['video'][0], - start_frame=0, stop_frame=db_task.size, step=1, - width=image.width, height=image.height) - image.close() - else: + if db_task.mode == 'interpolation': + image = Image.open(db_task.get_frame_path(0)) + models.Video.objects.create(task=db_task, path=media[media_type][0], + start_frame=0, stop_frame=db_task.size, step=1, + width=image.width, height=image.height) + image.close() + + if db_task.mode == 'annotation': models.Image.objects.bulk_create(db_images) slogger.glob.info("Founded frames {} for task #{}".format(db_task.size, tid)) From 1b2506aedad0db4c18d17f57a5ee0ab44f4b7c33 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Tue, 30 Apr 2019 18:29:14 +0300 Subject: [PATCH 08/12] improved media data validation fixed bugs --- cvat/apps/engine/media_extractors.py | 3 + cvat/apps/engine/settings.py | 12 ++++ cvat/apps/engine/task.py | 82 +++++++++++++++++----------- 3 files changed, 66 insertions(+), 31 deletions(-) diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 6512c5e9c8b0..32aa3c78fca1 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -13,6 +13,9 @@ def __init__(self, source_path, dest_path, image_quality): self._dest_path = dest_path self._image_quality = image_quality + def get_source_name(self): + return self._source_path + class ImageListExtractor(MediaExtractor): def __init__(self, source_path, dest_path, image_quality): if not source_path: diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py index 7eb19a644f6d..6d321ba4d0d5 100644 --- a/cvat/apps/engine/settings.py +++ b/cvat/apps/engine/settings.py @@ -30,26 +30,38 @@ def _is_image(path): def _is_dir(path): return os.path.isdir(path) +# 'has_mime_type': function receives 1 argument - path to file. +# Should return True if file has specified media type. +# 'extractor': class that extracts images from specified media. +# 'mode': 'annotation' or 'interpolation' - mode of task that should be created. +# 'unique': True or False - describes how the type can be combined with other. +# True - only one item of this type and no other is allowed +# False - this media types can be combined with other which have unique == False + MEDIA_TYPES = { 'image': { 'has_mime_type': _is_image, 'extractor': ImageListExtractor, 'mode': 'annotation', + 'unique': False, }, 'video': { 'has_mime_type': _is_video, 'extractor': VideoExtractor, 'mode': 'interpolation', + 'unique': True, }, 'archive': { 'has_mime_type': _is_archive, 'extractor': ArchiveExtractor, 'mode': 'annotation', + 'unique': True, }, 'directory': { 'has_mime_type': _is_dir, 'extractor': DirectoryExtractor, 'mode': 'annotation', + 'unique': False, }, } diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 323656a51f72..ab7e689182b2 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -177,18 +177,24 @@ def count_files(file_mapping, counter): counter=counter, ) - num_videos = len(counter["video"]) - num_archives = len(counter["archive"]) - num_images = len(counter["image"]) + len(counter["directory"]) - if (num_videos > 1 or num_archives > 1 or - (num_videos == 1 and num_archives + num_images > 0) or - (num_archives == 1 and num_videos + num_images > 0) or - (num_images > 0 and num_archives + num_videos > 0)): - - raise ValueError("Only one archive, one video or many images can be \ - dowloaded simultaneously. {} image(s), {} dir(s), {} video(s), {} \ - archive(s) found".format(counter['image'], counter['directory'], - counter['video'], counter['archive'])) + unique_entries = 0 + multiple_entries = 0 + for media_type, media_config in MEDIA_TYPES.items(): + if counter[media_type]: + if media_config['unique']: + unique_entries += len(counter[media_type]) + else: + multiple_entries += len(counter[media_type]) + + if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1: + unique_types = ', '.join([k for k, v in MEDIA_TYPES.items() if v['unique']]) + multiply_types = ', '.join([k for k, v in MEDIA_TYPES.items() if not v['unique']]) + count = ', '.join(['{} {}(s)'.format(len(v), k) for k, v in counter.items()]) + raise ValueError('Only one {} or many {} can be used simultaneously, \ + but {} found.'.format(unique_types, multiply_types, count)) + + if unique_entries == 0 and multiple_entries == 0: + raise ValueError('No media data found') return counter @@ -207,7 +213,12 @@ def _create_thread(tid, data): _copy_data_from_share(data['server_files'], upload_dir) job = rq.get_current_job() + job.meta['status'] = 'Media files is being extracted...' + job.save_meta() + db_images = [] + extractors = [] + length = 0 for media_type, media_files in media.items(): if not media_files: continue @@ -217,34 +228,43 @@ def _create_thread(tid, data): dest_path=upload_dir, image_quality=db_task.image_quality, ) - + length += len(extractor) db_task.mode = MEDIA_TYPES[media_type]['mode'] + extractors.append(extractor) + + for extractor in extractors: for frame, image_orig_path in enumerate(extractor): - image_dest_path = db_task.get_frame_path(frame) - db_task.size += 1 + image_dest_path = db_task.get_frame_path(db_task.size) dirname = os.path.dirname(image_dest_path) + if not os.path.exists(dirname): - os.makedirs(dirname) + os.makedirs(dirname) + if db_task.mode == 'interpolation': - job.meta['status'] = 'Video is being extracted..' - job.save_meta() extractor.save_image(frame, image_dest_path) else: - progress = frame * 100 // len(extractor) - job.meta['status'] = 'Images are being compressed.. {}%'.format(progress) - job.save_meta() width, height = extractor.save_image(frame, image_dest_path) - db_images.append(models.Image(task=db_task, path=image_orig_path, - frame=frame, width=width, height=height)) + db_images.append(models.Image( + task=db_task, + path=os.path.relpath(image_orig_path, upload_dir), + frame=db_task.size, + width=width, height=height)) - if db_task.mode == 'interpolation': - image = Image.open(db_task.get_frame_path(0)) - models.Video.objects.create(task=db_task, path=media[media_type][0], - start_frame=0, stop_frame=db_task.size, step=1, - width=image.width, height=image.height) - image.close() - - if db_task.mode == 'annotation': + db_task.size += 1 + progress = frame * 100 // length + job.meta['status'] = 'Images are being compressed... {}%'.format(progress) + job.save_meta() + + if db_task.mode == 'interpolation': + image = Image.open(db_task.get_frame_path(0)) + models.Video.objects.create( + task=db_task, + path=os.path.relpath(extractors[0].get_source_name(), upload_dir), + start_frame=0, stop_frame=db_task.size, + step=1, + width=image.width, height=image.height) + image.close() + else: models.Image.objects.bulk_create(db_images) slogger.glob.info("Founded frames {} for task #{}".format(db_task.size, tid)) From 392e2023ab906fc70c380f340f27035cdc28075c Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Wed, 5 Jun 2019 16:34:45 +0300 Subject: [PATCH 09/12] Adopt changes from develop --- cvat/apps/engine/media_extractors.py | 65 +++++++++++++++++++++++----- cvat/apps/engine/task.py | 11 +++-- 2 files changed, 60 insertions(+), 16 deletions(-) diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 32aa3c78fca1..78807df21685 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -8,19 +8,30 @@ from PIL import Image class MediaExtractor: - def __init__(self, source_path, dest_path, image_quality): + def __init__(self, source_path, dest_path, image_quality, step, start, stop): self._source_path = source_path self._dest_path = dest_path self._image_quality = image_quality + self._step = step + self._start = start + self._stop = stop def get_source_name(self): return self._source_path +#Note step, start, stop have no affect class ImageListExtractor(MediaExtractor): - def __init__(self, source_path, dest_path, image_quality): + def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): if not source_path: raise Exception('No image found') - super().__init__(source_path, dest_path, image_quality) + super().__init__( + source_path=sorted(source_path), + dest_path=dest_path, + image_quality=image_quality, + step=1, + start=0, + stop=0, + ) def __iter__(self): return iter(self._source_path) @@ -47,8 +58,9 @@ def save_image(self, k, dest_path): image.close() return width, height +#Note step, start, stop have no affect class DirectoryExtractor(ImageListExtractor): - def __init__(self, source_path, dest_path, image_quality): + def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): from cvat.apps.engine.settings import _get_mime image_paths = [] for source in source_path: @@ -56,11 +68,18 @@ def __init__(self, source_path, dest_path, image_quality): paths = [os.path.join(root, f) for f in files] paths = filter(lambda x: _get_mime(x) == 'image', paths) image_paths.extend(paths) - image_paths.sort() - super().__init__(image_paths, dest_path, image_quality) - + super().__init__( + source_path=sorted(source_path), + dest_path=dest_path, + image_quality=image_quality, + step=1, + start=0, + stop=0, + ) + +#Note step, start, stop have no affect class ArchiveExtractor(ImageListExtractor): - def __init__(self, source_path, dest_path, image_quality): + def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): from cvat.apps.engine.settings import _get_mime Archive(source_path[0]).extractall(dest_path) os.remove(source_path[0]) @@ -69,20 +88,42 @@ def __init__(self, source_path, dest_path, image_quality): paths = [os.path.join(root, f) for f in files] paths = filter(lambda x: _get_mime(x) == 'image', paths) image_paths.extend(paths) - image_paths.sort() - super().__init__(image_paths, dest_path, image_quality) + super().__init__( + source_path=sorted(source_path), + dest_path=dest_path, + image_quality=image_quality, + step=1, + start=0, + stop=0, + ) class VideoExtractor(MediaExtractor): - def __init__(self, source_path, dest_path, image_quality): + def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): from cvat.apps.engine.log import slogger _dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data') - super().__init__(source_path[0], _dest_path, image_quality) + super().__init__( + source_path=source_path[0], + dest_path=_dest_path, + image_quality=image_quality, + step=step, + start=start, + stop=stop, + ) # translate inversed range 1:95 to 2:32 translated_quality = 96 - self._image_quality translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2) self._tmp_output = tempfile.mkdtemp(prefix='cvat-', suffix='.data') target_path = os.path.join(self._tmp_output, '%d.jpg') output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality) + filters = '' + if self._stop > 0: + filters = 'between(n,' + str(self._start) + ',' + str(self._stop) + ')' + elif self._start > 0: + filters = 'gte(n,' + str(self._start) + ')' + if self._step > 1: + filters += ('*' if filters else '') + 'not(mod(n-' + str(self._start) + ',' + str(self._step) + '))' + if filters: + output_opts += " -vf select=\"'" + filters + "'\"" ff = FFmpeg( inputs = {self._source_path: None}, diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index ab7e689182b2..82690daf1a18 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -227,6 +227,9 @@ def _create_thread(tid, data): source_path=[os.path.join(upload_dir, f) for f in media_files], dest_path=upload_dir, image_quality=db_task.image_quality, + step=db_task.get_frame_step(), + start=db_task.start_frame, + stop=db_task.stop_frame, ) length += len(extractor) db_task.mode = MEDIA_TYPES[media_type]['mode'] @@ -246,7 +249,7 @@ def _create_thread(tid, data): width, height = extractor.save_image(frame, image_dest_path) db_images.append(models.Image( task=db_task, - path=os.path.relpath(image_orig_path, upload_dir), + path=image_orig_path, frame=db_task.size, width=width, height=height)) @@ -259,11 +262,11 @@ def _create_thread(tid, data): image = Image.open(db_task.get_frame_path(0)) models.Video.objects.create( task=db_task, - path=os.path.relpath(extractors[0].get_source_name(), upload_dir), - start_frame=0, stop_frame=db_task.size, - step=1, + path=extractors[0].get_source_name(), width=image.width, height=image.height) image.close() + if db_task.stop_frame == 0: + db_task.stop_frame = db_task.start_frame + (db_task.size - 1) * db_task.get_frame_step() else: models.Image.objects.bulk_create(db_images) From 97a1c7b54de89d9acb55847e0ab9ad94346c6102 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Mon, 8 Jul 2019 09:52:02 +0300 Subject: [PATCH 10/12] moved engine.settings to engine.media_extractors module --- cvat/apps/engine/media_extractors.py | 79 +++++++++++++++++-- .../0016_attribute_spec_20190217.py | 6 +- cvat/apps/engine/settings.py | 73 ----------------- cvat/apps/engine/task.py | 8 +- 4 files changed, 80 insertions(+), 86 deletions(-) delete mode 100644 cvat/apps/engine/settings.py diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 78807df21685..f8ebea777c90 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -7,6 +7,20 @@ from pyunpack import Archive from PIL import Image +import mimetypes +_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) +MEDIA_MIMETYPES_FILES = [ + os.path.join(_SCRIPT_DIR, "media.mimetypes"), +] +mimetypes.init(files=MEDIA_MIMETYPES_FILES) + +def get_mime(name): + for type_name, type_def in MEDIA_TYPES.items(): + if type_def['has_mime_type'](name): + return type_name + + return 'unknown' + class MediaExtractor: def __init__(self, source_path, dest_path, image_quality, step, start, stop): self._source_path = source_path @@ -61,15 +75,14 @@ def save_image(self, k, dest_path): #Note step, start, stop have no affect class DirectoryExtractor(ImageListExtractor): def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): - from cvat.apps.engine.settings import _get_mime image_paths = [] for source in source_path: for root, _, files in os.walk(source): paths = [os.path.join(root, f) for f in files] - paths = filter(lambda x: _get_mime(x) == 'image', paths) + paths = filter(lambda x: get_mime(x) == 'image', paths) image_paths.extend(paths) super().__init__( - source_path=sorted(source_path), + source_path=sorted(image_paths), dest_path=dest_path, image_quality=image_quality, step=1, @@ -80,16 +93,15 @@ def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop= #Note step, start, stop have no affect class ArchiveExtractor(ImageListExtractor): def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): - from cvat.apps.engine.settings import _get_mime Archive(source_path[0]).extractall(dest_path) os.remove(source_path[0]) image_paths = [] for root, _, files in os.walk(dest_path): paths = [os.path.join(root, f) for f in files] - paths = filter(lambda x: _get_mime(x) == 'image', paths) + paths = filter(lambda x: get_mime(x) == 'image', paths) image_paths.extend(paths) super().__init__( - source_path=sorted(source_path), + source_path=sorted(image_paths), dest_path=dest_path, image_quality=image_quality, step=1, @@ -153,3 +165,58 @@ def __len__(self): def save_image(self, k, dest_path): shutil.copyfile(self[k], dest_path) + +def _is_archive(path): + mime = mimetypes.guess_type(path) + mime_type = mime[0] + encoding = mime[1] + supportedArchives = ['application/zip', 'application/x-rar-compressed', + 'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio', + 'gzip', 'bzip2'] + return mime_type in supportedArchives or encoding in supportedArchives + +def _is_video(path): + mime = mimetypes.guess_type(path) + return mime[0] is not None and mime[0].startswith('video') + +def _is_image(path): + mime = mimetypes.guess_type(path) + return mime[0] is not None and mime[0].startswith('image') + +def _is_dir(path): + return os.path.isdir(path) + +# 'has_mime_type': function receives 1 argument - path to file. +# Should return True if file has specified media type. +# 'extractor': class that extracts images from specified media. +# 'mode': 'annotation' or 'interpolation' - mode of task that should be created. +# 'unique': True or False - describes how the type can be combined with other. +# True - only one item of this type and no other is allowed +# False - this media types can be combined with other which have unique == False + +MEDIA_TYPES = { + 'image': { + 'has_mime_type': _is_image, + 'extractor': ImageListExtractor, + 'mode': 'annotation', + 'unique': False, + }, + 'video': { + 'has_mime_type': _is_video, + 'extractor': VideoExtractor, + 'mode': 'interpolation', + 'unique': True, + }, + 'archive': { + 'has_mime_type': _is_archive, + 'extractor': ArchiveExtractor, + 'mode': 'annotation', + 'unique': True, + }, + 'directory': { + 'has_mime_type': _is_dir, + 'extractor': DirectoryExtractor, + 'mode': 'annotation', + 'unique': False, + }, +} diff --git a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py index 25dcd61310b0..27d273af2790 100644 --- a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py +++ b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py @@ -5,7 +5,7 @@ from PIL import Image from django.db import migrations from django.conf import settings -from cvat.apps.engine.settings import _get_mime +from cvat.apps.engine.media_extractors import get_mime def parse_attribute(value): match = re.match(r'^([~@])(\w+)=(\w+):(.+)?$', value) @@ -81,7 +81,7 @@ def fill_task_meta_data_forward(apps, schema_editor): video = "" for root, _, files in os.walk(_get_upload_dirname(db_task)): fullnames = map(lambda f: os.path.join(root, f), files) - videos = list(filter(lambda x: _get_mime(x) == 'video', fullnames)) + videos = list(filter(lambda x: get_mime(x) == 'video', fullnames)) if len(videos): video = videos[0] break @@ -100,7 +100,7 @@ def fill_task_meta_data_forward(apps, schema_editor): filenames = [] for root, _, files in os.walk(_get_upload_dirname(db_task)): fullnames = map(lambda f: os.path.join(root, f), files) - images = filter(lambda x: _get_mime(x) == 'image', fullnames) + images = filter(lambda x: get_mime(x) == 'image', fullnames) filenames.extend(images) filenames.sort() diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py deleted file mode 100644 index 6d321ba4d0d5..000000000000 --- a/cvat/apps/engine/settings.py +++ /dev/null @@ -1,73 +0,0 @@ -import os -import mimetypes -from cvat.apps.engine.media_extractors import ImageListExtractor, DirectoryExtractor, \ - VideoExtractor, ArchiveExtractor -_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) - -MEDIA_MIMETYPES_FILES = [ - os.path.join(_SCRIPT_DIR, "media.mimetypes"), -] - -mimetypes.init(files=MEDIA_MIMETYPES_FILES) - -def _is_archive(path): - mime = mimetypes.guess_type(path) - mime_type = mime[0] - encoding = mime[1] - supportedArchives = ['application/zip', 'application/x-rar-compressed', - 'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio', - 'gzip', 'bzip2'] - return mime_type in supportedArchives or encoding in supportedArchives - -def _is_video(path): - mime = mimetypes.guess_type(path) - return mime[0] is not None and mime[0].startswith('video') - -def _is_image(path): - mime = mimetypes.guess_type(path) - return mime[0] is not None and mime[0].startswith('image') - -def _is_dir(path): - return os.path.isdir(path) - -# 'has_mime_type': function receives 1 argument - path to file. -# Should return True if file has specified media type. -# 'extractor': class that extracts images from specified media. -# 'mode': 'annotation' or 'interpolation' - mode of task that should be created. -# 'unique': True or False - describes how the type can be combined with other. -# True - only one item of this type and no other is allowed -# False - this media types can be combined with other which have unique == False - -MEDIA_TYPES = { - 'image': { - 'has_mime_type': _is_image, - 'extractor': ImageListExtractor, - 'mode': 'annotation', - 'unique': False, - }, - 'video': { - 'has_mime_type': _is_video, - 'extractor': VideoExtractor, - 'mode': 'interpolation', - 'unique': True, - }, - 'archive': { - 'has_mime_type': _is_archive, - 'extractor': ArchiveExtractor, - 'mode': 'annotation', - 'unique': True, - }, - 'directory': { - 'has_mime_type': _is_dir, - 'extractor': DirectoryExtractor, - 'mode': 'annotation', - 'unique': False, - }, -} - -def _get_mime(name): - for type_name, type_def in MEDIA_TYPES.items(): - if type_def['has_mime_type'](name): - return type_name - - return 'unknown' diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index c3fe3a10cff8..7d1fbd188953 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -14,7 +14,7 @@ from urllib import parse as urlparse from urllib import request as urlrequest -from cvat.apps.engine.settings import _get_mime, MEDIA_TYPES +from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES import django_rq from django.conf import settings @@ -60,7 +60,7 @@ def make_image_meta_cache(db_task): filenames = [] for root, _, files in os.walk(db_task.get_upload_dirname()): fullnames = map(lambda f: os.path.join(root, f), files) - images = filter(lambda x: _get_mime(x) == 'image', fullnames) + images = filter(lambda x: get_mime(x) == 'image', fullnames) filenames.extend(images) filenames.sort() @@ -152,7 +152,7 @@ def _validate_data(data): if '..' in path.split(os.path.sep): raise ValueError("Don't use '..' inside file paths") full_path = os.path.abspath(os.path.join(share_root, path)) - if 'directory' == _get_mime(full_path): + if 'directory' == get_mime(full_path): server_files['dirs'].append(path) else: server_files['files'].append(path) @@ -165,7 +165,7 @@ def _validate_data(data): def count_files(file_mapping, counter): for rel_path, full_path in file_mapping.items(): - mime = _get_mime(full_path) + mime = get_mime(full_path) counter[mime].append(rel_path) counter = { media_type: [] for media_type in MEDIA_TYPES.keys() } From 59e8634fe5b1904df1fe803a26a93d9327e14bd3 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Mon, 8 Jul 2019 10:55:18 +0300 Subject: [PATCH 11/12] removed code duplication --- cvat/apps/engine/media_extractors.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index f8ebea777c90..bc4424ba80ac 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -91,17 +91,11 @@ def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop= ) #Note step, start, stop have no affect -class ArchiveExtractor(ImageListExtractor): +class ArchiveExtractor(DirectoryExtractor): def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): Archive(source_path[0]).extractall(dest_path) - os.remove(source_path[0]) - image_paths = [] - for root, _, files in os.walk(dest_path): - paths = [os.path.join(root, f) for f in files] - paths = filter(lambda x: get_mime(x) == 'image', paths) - image_paths.extend(paths) super().__init__( - source_path=sorted(image_paths), + source_path=[dest_path], dest_path=dest_path, image_quality=image_quality, step=1, From 18e9ef698dc6214def12cf658452c378b71bdfbb Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Mon, 8 Jul 2019 11:03:48 +0300 Subject: [PATCH 12/12] added a note to changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index db44d5423858..5d746e0bb129 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Video frame filter - Admins are no longer limited to a subset of python commands in the auto annotation application - Remote data source (list of URLs to create an annotation task) +- Ability to create a custom extractors for unsupported media types ### Changed - Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before)