From e8725cfde88348e4a3cc8bed8d1755ffa6714aab Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Mon, 29 Apr 2019 17:20:03 +0300
Subject: [PATCH 01/12] ability to add custom extractors

---
 cvat/apps/engine/media_extractors.py          | 115 ++++++++++
 .../0016_attribute_spec_20190217.py           |   2 +-
 cvat/apps/engine/mime.py                      |  33 +++
 cvat/apps/engine/settings.py                  |  23 ++
 cvat/apps/engine/task.py                      | 212 +++++-------------
 5 files changed, 225 insertions(+), 160 deletions(-)
 create mode 100644 cvat/apps/engine/media_extractors.py
 create mode 100644 cvat/apps/engine/mime.py
 create mode 100644 cvat/apps/engine/settings.py

diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
new file mode 100644
index 000000000000..eb876fff2e5a
--- /dev/null
+++ b/cvat/apps/engine/media_extractors.py
@@ -0,0 +1,115 @@
+import os
+import tempfile
+import shutil
+import numpy as np
+
+from ffmpy import FFmpeg
+from pyunpack import Archive
+from PIL import Image
+
+from cvat.apps.engine.mime import get_mime
+from .log import slogger
+
+
+class MediaExtractor:
+    def __init__(self, source_path, dest_path, compress_quality):
+        self._source_path = source_path
+        self._dest_path = dest_path
+        self._compress_quality = compress_quality
+
+    def __getitem__(self, k):
+        pass
+
+    def __iter__(self):
+        pass
+
+class ImageListExtractor(MediaExtractor):
+    def __init__(self, source_path, dest_path, compress_quality):
+        return super().__init__(source_path, dest_path, compress_quality)
+
+    def __iter__(self):
+        return iter(self._source_path)
+
+    def __getitem__(self, k):
+        return self._source_path[k]
+
+    def __len__(self):
+        return len(self._source_path)
+
+    def save_image(self, k, dest_path):
+        image = Image.open(self[k])
+        # Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion
+        if image.mode == "I":
+            # Image mode is 32bit integer pixels.
+            # Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit
+            im_data = np.array(image)
+            im_data = im_data * (2**8 / im_data.max())
+            image = Image.fromarray(im_data.astype(np.int32))
+        image = image.convert('RGB')
+        image.save(dest_path, quality=self._compress_quality, optimize=True)
+        height = image.height
+        width = image.width
+        image.close()
+        return width, height
+
+class DirectoryExtractor(ImageListExtractor):
+    def __init__(self, source_path, dest_path, compress_quality):
+        image_paths = []
+        for root, _, files in os.walk(source_path[0]):
+            paths = [os.path.join(root, f) for f in files]
+            paths = filter(lambda x: get_mime(x) == 'image', paths)
+            image_paths.extend(paths)
+        image_paths.sort()
+        super().__init__(image_paths, dest_path, compress_quality)
+
+class ArchiveExtractor(ImageListExtractor):
+    def __init__(self, source_path, dest_path, compress_quality):
+        Archive(source_path[0]).extractall(dest_path)
+        os.remove(source_path[0])
+        image_paths = []
+        for root, _, files in os.walk(dest_path):
+            paths = [os.path.join(root, f) for f in files]
+            paths = filter(lambda x: get_mime(x) == 'image', paths)
+            image_paths.extend(paths)
+        image_paths.sort()
+        super().__init__(image_paths, dest_path, compress_quality)
+
+class VideoExtractor(MediaExtractor):
+    def __init__(self, source_path, dest_path, compress_quality):
+        _dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
+        super().__init__(source_path[0], _dest_path, compress_quality)
+        # translate inversed range 1:95 to 2:32
+        translated_quality = 96 - self._compress_quality
+        translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2)
+        self._tmp_output = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
+        target_path = os.path.join(self._tmp_output, '%d.jpg')
+        output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality)
+
+        ff = FFmpeg(
+            inputs  = {self._source_path: None},
+            outputs = {target_path: output_opts})
+
+        slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
+        ff.run()
+
+    def _getframepath(self, k):
+        return "{0}/{1}.jpg".format(self._tmp_output, k)
+
+    def __iter__(self):
+        i = 0
+        while os.path.exists(self._getframepath(i)):
+            yield self._getframepath(i)
+            i += 1
+
+    def __del__(self):
+        if self._tmp_output:
+            shutil.rmtree(self._tmp_output)
+
+    def __getitem__(self, k):
+        return self._getframepath(k)
+
+    def __len__(self):
+        return len(os.listdir(self._tmp_output))
+
+    def save_image(self, k, dest_path):
+        shutil.copyfile(self[k], dest_path)
diff --git a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
index dfb84fff98fd..222095633f61 100644
--- a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
+++ b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
@@ -5,7 +5,7 @@
 from PIL import Image
 from django.db import migrations
 from django.conf import settings
-from cvat.apps.engine.task import _get_mime
+from cvat.apps.engine.mime import get_mime
 
 def parse_attribute(value):
     match = re.match(r'^([~@])(\w+)=(\w+):(.+)?$', value)
diff --git a/cvat/apps/engine/mime.py b/cvat/apps/engine/mime.py
new file mode 100644
index 000000000000..16d41b8f33ba
--- /dev/null
+++ b/cvat/apps/engine/mime.py
@@ -0,0 +1,33 @@
+import os
+import mimetypes
+_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
+_MEDIA_MIMETYPES_FILE = os.path.join(_SCRIPT_DIR, "media.mimetypes")
+mimetypes.init(files=[_MEDIA_MIMETYPES_FILE])
+
+def is_archive(path):
+    mime = mimetypes.guess_type(path)
+    mime_type = mime[0]
+    encoding = mime[1]
+    supportedArchives = ['application/zip', 'application/x-rar-compressed',
+        'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
+        'gzip', 'bzip2']
+    return mime_type in supportedArchives or encoding in supportedArchives
+
+def is_video(path):
+    mime = mimetypes.guess_type(path)
+    return mime[0] is not None and mime[0].startswith('video')
+
+def is_image(path):
+    mime = mimetypes.guess_type(path)
+    return mime[0] is not None and mime[0].startswith('image')
+
+def is_dir(path):
+    return os.path.isdir(path)
+
+def get_mime(name):
+    from cvat.apps.engine.settings import MEDIA_TYPES
+    for type_name, type_def in MEDIA_TYPES.items():
+        if type_def['has_mime_type'](name):
+            return type_name
+
+    return 'unknown'
diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py
new file mode 100644
index 000000000000..0512246a144e
--- /dev/null
+++ b/cvat/apps/engine/settings.py
@@ -0,0 +1,23 @@
+import os
+from cvat.apps.engine.mime import is_image, is_video, is_archive, is_dir
+from cvat.apps.engine.media_extractors import ImageListExtractor, DirectoryExtractor, \
+    VideoExtractor, ArchiveExtractor
+
+MEDIA_TYPES = {
+    'image': {
+        'has_mime_type': is_image,
+        'extractor': ImageListExtractor,
+    },
+    'video': {
+        'has_mime_type': is_video,
+        'extractor': VideoExtractor,
+    },
+    'archive': {
+        'has_mime_type': is_archive,
+        'extractor': ArchiveExtractor,
+    },
+    'directory': {
+        'has_mime_type': is_dir,
+        'extractor': DirectoryExtractor,
+    },
+}
diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
index 5160c7d7f82a..025560607866 100644
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@@ -13,16 +13,12 @@
 from traceback import print_exception
 from ast import literal_eval
 
-import mimetypes
-_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
-_MEDIA_MIMETYPES_FILE = os.path.join(_SCRIPT_DIR, "media.mimetypes")
-mimetypes.init(files=[_MEDIA_MIMETYPES_FILE])
+from cvat.apps.engine.mime import get_mime
+from cvat.apps.engine.settings import MEDIA_TYPES
 
 import django_rq
 from django.conf import settings
 from django.db import transaction
-from ffmpy import FFmpeg
-from pyunpack import Archive
 from distutils.dir_util import copy_tree
 
 from . import models
@@ -47,39 +43,6 @@ def rq_handler(job, exc_type, exc_value, traceback):
 
 ############################# Internal implementation for server API
 
-class _FrameExtractor:
-    def __init__(self, source_path, compress_quality, flip_flag=False):
-        # translate inversed range 1:95 to 2:32
-        translated_quality = 96 - compress_quality
-        translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2)
-        self.output = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
-        target_path = os.path.join(self.output, '%d.jpg')
-        output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality)
-        if flip_flag:
-            output_opts += ' -vf "transpose=2,transpose=2"'
-        ff = FFmpeg(
-            inputs  = {source_path: None},
-            outputs = {target_path: output_opts})
-
-        slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
-        ff.run()
-
-    def getframepath(self, k):
-        return "{0}/{1}.jpg".format(self.output, k)
-
-    def __del__(self):
-        if self.output:
-            shutil.rmtree(self.output)
-
-    def __getitem__(self, k):
-        return self.getframepath(k)
-
-    def __iter__(self):
-        i = 0
-        while os.path.exists(self.getframepath(i)):
-            yield self[i]
-            i += 1
-
 def make_image_meta_cache(db_task):
     with open(db_task.get_image_meta_cache_path(), 'w') as meta_file:
         cache = {
@@ -97,7 +60,7 @@ def make_image_meta_cache(db_task):
             filenames = []
             for root, _, files in os.walk(db_task.get_upload_dirname()):
                 fullnames = map(lambda f: os.path.join(root, f), files)
-                images = filter(lambda x: _get_mime(x) == 'image', fullnames)
+                images = filter(lambda x: get_mime(x) == 'image', fullnames)
                 filenames.extend(images)
             filenames.sort()
 
@@ -121,31 +84,6 @@ def get_image_meta_cache(db_task):
         with open(db_task.get_image_meta_cache_path()) as meta_cache_file:
             return literal_eval(meta_cache_file.read())
 
-
-def _get_mime(name):
-    mime = mimetypes.guess_type(name)
-    mime_type = mime[0]
-    encoding = mime[1]
-    # zip, rar, tar, tar.gz, tar.bz2, 7z, cpio
-    supportedArchives = ['application/zip', 'application/x-rar-compressed',
-        'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
-        'gzip', 'bzip2']
-    if mime_type is not None:
-        if mime_type.startswith('video'):
-            return 'video'
-        elif mime_type in supportedArchives or encoding in supportedArchives:
-            return 'archive'
-        elif mime_type.startswith('image'):
-            return 'image'
-        else:
-            return 'unknown'
-    else:
-        if os.path.isdir(name):
-            return 'directory'
-        else:
-            return 'unknown'
-
-
 def _copy_data_from_share(server_files, upload_dir):
     job = rq.get_current_job()
     job.meta['status'] = 'Data are being copied from share..'
@@ -162,72 +100,6 @@ def _copy_data_from_share(server_files, upload_dir):
                 os.makedirs(target_dir)
             shutil.copyfile(source_path, target_path)
 
-def _unpack_archive(archive, upload_dir):
-    job = rq.get_current_job()
-    job.meta['status'] = 'Archive is being unpacked..'
-    job.save_meta()
-
-    Archive(archive).extractall(upload_dir)
-    os.remove(archive)
-
-def _copy_video_to_task(video, db_task):
-    job = rq.get_current_job()
-    job.meta['status'] = 'Video is being extracted..'
-    job.save_meta()
-
-    extractor = _FrameExtractor(video, db_task.image_quality)
-    for frame, image_orig_path in enumerate(extractor):
-        image_dest_path = db_task.get_frame_path(frame)
-        db_task.size += 1
-        dirname = os.path.dirname(image_dest_path)
-        if not os.path.exists(dirname):
-            os.makedirs(dirname)
-        shutil.copyfile(image_orig_path, image_dest_path)
-
-    image = Image.open(db_task.get_frame_path(0))
-    models.Video.objects.create(task=db_task, path=video,
-        start_frame=0, stop_frame=db_task.size, step=1,
-        width=image.width, height=image.height)
-    image.close()
-
-def _copy_images_to_task(upload_dir, db_task):
-    image_paths = []
-    for root, _, files in os.walk(upload_dir):
-        paths = map(lambda f: os.path.join(root, f), files)
-        paths = filter(lambda x: _get_mime(x) == 'image', paths)
-        image_paths.extend(paths)
-    image_paths.sort()
-
-    db_images = []
-    if len(image_paths):
-        job = rq.get_current_job()
-        for frame, image_orig_path in enumerate(image_paths):
-            progress = frame * 100 // len(image_paths)
-            job.meta['status'] = 'Images are being compressed.. {}%'.format(progress)
-            job.save_meta()
-            image_dest_path = db_task.get_frame_path(frame)
-            db_task.size += 1
-            dirname = os.path.dirname(image_dest_path)
-            if not os.path.exists(dirname):
-                os.makedirs(dirname)
-            image = Image.open(image_orig_path)
-            # Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion
-            if image.mode == "I":
-                # Image mode is 32bit integer pixels.
-                # Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit
-                im_data = np.array(image)
-                im_data = im_data * (2**8 / im_data.max())
-                image = Image.fromarray(im_data.astype(np.int32))
-            image = image.convert('RGB')
-            image.save(image_dest_path, quality=db_task.image_quality, optimize=True)
-            db_images.append(models.Image(task=db_task, path=image_orig_path,
-                frame=frame, width=image.width, height=image.height))
-            image.close()
-
-        models.Image.objects.bulk_create(db_images)
-    else:
-        raise ValueError("Image files were not found")
-
 def _save_task_to_db(db_task):
     job = rq.get_current_job()
     job.meta['status'] = 'Task is being saved in database'
@@ -280,7 +152,7 @@ def _validate_data(data):
         if '..' in path.split(os.path.sep):
             raise ValueError("Don't use '..' inside file paths")
         full_path = os.path.abspath(os.path.join(share_root, path))
-        if 'directory' == _get_mime(full_path):
+        if 'directory' == get_mime(full_path):
             server_files['dirs'].append(path)
         else:
             server_files['files'].append(path)
@@ -292,32 +164,25 @@ def _validate_data(data):
         if not [ f_name for f_name in server_files['files'] if f_name.startswith(dir_name)]]
 
     def count_files(file_mapping, counter):
-        archive = None
-        video = None
         for rel_path, full_path in file_mapping.items():
-            mime = _get_mime(full_path)
-            counter[mime] += 1
-            if mime == "archive":
-                archive = rel_path
-            elif mime == "video":
-                video = rel_path
-        return video, archive
+            mime = get_mime(full_path)
+            counter[mime].append(rel_path)
 
-    counter = {"image": 0, "video": 0, "archive": 0, "directory": 0}
+    counter = { media_type: [] for media_type in MEDIA_TYPES.keys() }
 
-    client_video, client_archive = count_files(
+    count_files(
         file_mapping={ f:f for f in data['client_files']},
         counter=counter,
     )
 
-    server_video, server_archive = count_files(
+    count_files(
         file_mapping={ f:os.path.abspath(os.path.join(share_root, f)) for f in data['server_files']},
         counter=counter,
     )
 
-    num_videos = counter["video"]
-    num_archives = counter["archive"]
-    num_images = counter["image"] + counter["directory"]
+    num_videos = len(counter["video"])
+    num_archives = len(counter["archive"])
+    num_images = len(counter["image"]) + len(counter["directory"])
     if (num_videos > 1 or num_archives > 1 or
         (num_videos == 1 and num_archives + num_images > 0) or
         (num_archives == 1 and num_videos + num_images > 0) or
@@ -328,7 +193,7 @@ def count_files(file_mapping, counter):
             archive(s) found".format(counter['image'], counter['directory'],
                 counter['video'], counter['archive']))
 
-    return client_video or server_video, client_archive or server_archive
+    return counter
 
 @transaction.atomic
 def _create_thread(tid, data):
@@ -339,23 +204,52 @@ def _create_thread(tid, data):
         raise NotImplementedError("Adding more data is not implemented")
 
     upload_dir = db_task.get_upload_dirname()
-    video, archive = _validate_data(data)
+    media = _validate_data(data)
 
     if data['server_files']:
         _copy_data_from_share(data['server_files'], upload_dir)
 
-    if archive:
-        archive = os.path.join(upload_dir, archive)
-        _unpack_archive(archive, upload_dir)
+    job = rq.get_current_job()
+
+    db_images = []
+
+    db_task.mode = 'interpolation' if media['video'] else 'annotation'
+    for media_type, media_files in media.items():
+        if not media_files:
+            continue
+
+        extractor = MEDIA_TYPES[media_type]['extractor'](
+            source_path=[os.path.join(upload_dir, f) for f in media_files],
+            dest_path=upload_dir,
+            image_quality=db_task.image_quality,
+        )
 
-    if video:
-        db_task.mode = "interpolation"
-        video = os.path.join(upload_dir, video)
-        _copy_video_to_task(video, db_task)
+        for frame, image_orig_path in enumerate(extractor):
+            image_dest_path = db_task.get_frame_path(frame)
+            db_task.size += 1
+            dirname = os.path.dirname(image_dest_path)
+            if not os.path.exists(dirname):
+                    os.makedirs(dirname)
+            if db_task.mode == 'interpolation':
+                job.meta['status'] = 'Video is being extracted..'
+                job.save_meta()
+                extractor.save_image(frame, image_dest_path)
+            else:
+                progress = frame * 100 // len(extractor)
+                job.meta['status'] = 'Images are being compressed.. {}%'.format(progress)
+                job.save_meta()
+                width, height = extractor.save_image(frame, image_dest_path)
+                db_images.append(models.Image(task=db_task, path=image_orig_path,
+                    frame=frame, width=width, height=height))
+
+    if db_task.mode == 'interpolation':
+        image = Image.open(db_task.get_frame_path(0))
+        models.Video.objects.create(task=db_task, path=media['video'][0],
+            start_frame=0, stop_frame=db_task.size, step=1,
+            width=image.width, height=image.height)
+        image.close()
     else:
-        db_task.mode = "annotation"
-        _copy_images_to_task(upload_dir, db_task)
+        models.Image.objects.bulk_create(db_images)
 
     slogger.glob.info("Founded frames {} for task #{}".format(db_task.size, tid))
     _save_task_to_db(db_task)
-

From 09cdea88a13153e9b198730940fef56d5dc24191 Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Mon, 29 Apr 2019 17:51:51 +0300
Subject: [PATCH 02/12] added configurable mimetypes

---
 cvat/apps/engine/media_extractors.py          | 29 ++++++------
 .../0016_attribute_spec_20190217.py           |  2 +-
 cvat/apps/engine/mime.py                      | 33 --------------
 cvat/apps/engine/settings.py                  | 45 ++++++++++++++++---
 cvat/apps/engine/task.py                      |  9 ++--
 5 files changed, 60 insertions(+), 58 deletions(-)
 delete mode 100644 cvat/apps/engine/mime.py

diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
index eb876fff2e5a..8ef54f749035 100644
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@@ -7,15 +7,14 @@
 from pyunpack import Archive
 from PIL import Image
 
-from cvat.apps.engine.mime import get_mime
 from .log import slogger
 
 
 class MediaExtractor:
-    def __init__(self, source_path, dest_path, compress_quality):
+    def __init__(self, source_path, dest_path, image_quality):
         self._source_path = source_path
         self._dest_path = dest_path
-        self._compress_quality = compress_quality
+        self._image_quality = image_quality
 
     def __getitem__(self, k):
         pass
@@ -24,8 +23,8 @@ def __iter__(self):
         pass
 
 class ImageListExtractor(MediaExtractor):
-    def __init__(self, source_path, dest_path, compress_quality):
-        return super().__init__(source_path, dest_path, compress_quality)
+    def __init__(self, source_path, dest_path, image_quality):
+        return super().__init__(source_path, dest_path, image_quality)
 
     def __iter__(self):
         return iter(self._source_path)
@@ -46,40 +45,42 @@ def save_image(self, k, dest_path):
             im_data = im_data * (2**8 / im_data.max())
             image = Image.fromarray(im_data.astype(np.int32))
         image = image.convert('RGB')
-        image.save(dest_path, quality=self._compress_quality, optimize=True)
+        image.save(dest_path, quality=self._image_quality, optimize=True)
         height = image.height
         width = image.width
         image.close()
         return width, height
 
 class DirectoryExtractor(ImageListExtractor):
-    def __init__(self, source_path, dest_path, compress_quality):
+    def __init__(self, source_path, dest_path, image_quality):
+        from cvat.apps.engine.settings import _get_mime
         image_paths = []
         for root, _, files in os.walk(source_path[0]):
             paths = [os.path.join(root, f) for f in files]
             paths = filter(lambda x: get_mime(x) == 'image', paths)
             image_paths.extend(paths)
         image_paths.sort()
-        super().__init__(image_paths, dest_path, compress_quality)
+        super().__init__(image_paths, dest_path, image_quality)
 
 class ArchiveExtractor(ImageListExtractor):
-    def __init__(self, source_path, dest_path, compress_quality):
+    def __init__(self, source_path, dest_path, image_quality):
+        from cvat.apps.engine.settings import _get_mime
         Archive(source_path[0]).extractall(dest_path)
         os.remove(source_path[0])
         image_paths = []
         for root, _, files in os.walk(dest_path):
             paths = [os.path.join(root, f) for f in files]
-            paths = filter(lambda x: get_mime(x) == 'image', paths)
+            paths = filter(lambda x: _get_mime(x) == 'image', paths)
             image_paths.extend(paths)
         image_paths.sort()
-        super().__init__(image_paths, dest_path, compress_quality)
+        super().__init__(image_paths, dest_path, image_quality)
 
 class VideoExtractor(MediaExtractor):
-    def __init__(self, source_path, dest_path, compress_quality):
+    def __init__(self, source_path, dest_path, image_quality):
         _dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
-        super().__init__(source_path[0], _dest_path, compress_quality)
+        super().__init__(source_path[0], _dest_path, image_quality)
         # translate inversed range 1:95 to 2:32
-        translated_quality = 96 - self._compress_quality
+        translated_quality = 96 - self._image_quality
         translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2)
         self._tmp_output = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
         target_path = os.path.join(self._tmp_output, '%d.jpg')
diff --git a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
index 222095633f61..25dcd61310b0 100644
--- a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
+++ b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
@@ -5,7 +5,7 @@
 from PIL import Image
 from django.db import migrations
 from django.conf import settings
-from cvat.apps.engine.mime import get_mime
+from cvat.apps.engine.settings import _get_mime
 
 def parse_attribute(value):
     match = re.match(r'^([~@])(\w+)=(\w+):(.+)?$', value)
diff --git a/cvat/apps/engine/mime.py b/cvat/apps/engine/mime.py
deleted file mode 100644
index 16d41b8f33ba..000000000000
--- a/cvat/apps/engine/mime.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import os
-import mimetypes
-_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
-_MEDIA_MIMETYPES_FILE = os.path.join(_SCRIPT_DIR, "media.mimetypes")
-mimetypes.init(files=[_MEDIA_MIMETYPES_FILE])
-
-def is_archive(path):
-    mime = mimetypes.guess_type(path)
-    mime_type = mime[0]
-    encoding = mime[1]
-    supportedArchives = ['application/zip', 'application/x-rar-compressed',
-        'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
-        'gzip', 'bzip2']
-    return mime_type in supportedArchives or encoding in supportedArchives
-
-def is_video(path):
-    mime = mimetypes.guess_type(path)
-    return mime[0] is not None and mime[0].startswith('video')
-
-def is_image(path):
-    mime = mimetypes.guess_type(path)
-    return mime[0] is not None and mime[0].startswith('image')
-
-def is_dir(path):
-    return os.path.isdir(path)
-
-def get_mime(name):
-    from cvat.apps.engine.settings import MEDIA_TYPES
-    for type_name, type_def in MEDIA_TYPES.items():
-        if type_def['has_mime_type'](name):
-            return type_name
-
-    return 'unknown'
diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py
index 0512246a144e..cba200fcb135 100644
--- a/cvat/apps/engine/settings.py
+++ b/cvat/apps/engine/settings.py
@@ -1,23 +1,58 @@
 import os
-from cvat.apps.engine.mime import is_image, is_video, is_archive, is_dir
+import mimetypes
 from cvat.apps.engine.media_extractors import ImageListExtractor, DirectoryExtractor, \
     VideoExtractor, ArchiveExtractor
+_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
+
+MEDIA_MIMETYPES_FILES = [
+    os.path.join(_SCRIPT_DIR, "media.mimetypes")
+]
+
+mimetypes.init(files=MEDIA_MIMETYPES_FILES)
+
+def _is_archive(path):
+    mime = mimetypes.guess_type(path)
+    mime_type = mime[0]
+    encoding = mime[1]
+    supportedArchives = ['application/zip', 'application/x-rar-compressed',
+        'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
+        'gzip', 'bzip2']
+    return mime_type in supportedArchives or encoding in supportedArchives
+
+def _is_video(path):
+    mime = mimetypes.guess_type(path)
+    return mime[0] is not None and mime[0].startswith('video')
+
+def _is_image(path):
+    mime = mimetypes.guess_type(path)
+    return mime[0] is not None and mime[0].startswith('image')
+
+def _is_dir(path):
+    return os.path.isdir(path)
 
 MEDIA_TYPES = {
     'image': {
-        'has_mime_type': is_image,
+        'has_mime_type': _is_image,
         'extractor': ImageListExtractor,
     },
     'video': {
-        'has_mime_type': is_video,
+        'has_mime_type': _is_video,
         'extractor': VideoExtractor,
     },
     'archive': {
-        'has_mime_type': is_archive,
+        'has_mime_type': _is_archive,
         'extractor': ArchiveExtractor,
     },
     'directory': {
-        'has_mime_type': is_dir,
+        'has_mime_type': _is_dir,
         'extractor': DirectoryExtractor,
     },
 }
+
+def _get_mime(name):
+    from cvat.apps.engine.settings import MEDIA_TYPES
+    for type_name, type_def in MEDIA_TYPES.items():
+        if type_def['has_mime_type'](name):
+            return type_name
+
+    return 'unknown'
diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
index 025560607866..5e97ee371d6b 100644
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@@ -13,8 +13,7 @@
 from traceback import print_exception
 from ast import literal_eval
 
-from cvat.apps.engine.mime import get_mime
-from cvat.apps.engine.settings import MEDIA_TYPES
+from cvat.apps.engine.settings import _get_mime, MEDIA_TYPES
 
 import django_rq
 from django.conf import settings
@@ -60,7 +59,7 @@ def make_image_meta_cache(db_task):
             filenames = []
             for root, _, files in os.walk(db_task.get_upload_dirname()):
                 fullnames = map(lambda f: os.path.join(root, f), files)
-                images = filter(lambda x: get_mime(x) == 'image', fullnames)
+                images = filter(lambda x: _get_mime(x) == 'image', fullnames)
                 filenames.extend(images)
             filenames.sort()
 
@@ -152,7 +151,7 @@ def _validate_data(data):
         if '..' in path.split(os.path.sep):
             raise ValueError("Don't use '..' inside file paths")
         full_path = os.path.abspath(os.path.join(share_root, path))
-        if 'directory' == get_mime(full_path):
+        if 'directory' == _get_mime(full_path):
             server_files['dirs'].append(path)
         else:
             server_files['files'].append(path)
@@ -165,7 +164,7 @@ def _validate_data(data):
 
     def count_files(file_mapping, counter):
         for rel_path, full_path in file_mapping.items():
-            mime = get_mime(full_path)
+            mime = _get_mime(full_path)
             counter[mime].append(rel_path)
 
     counter = { media_type: [] for media_type in MEDIA_TYPES.keys() }

From c916c485768a90370fc293e7fb90c88d956f0309 Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Mon, 29 Apr 2019 20:08:41 +0300
Subject: [PATCH 03/12] minor fix

---
 cvat/apps/engine/media_extractors.py | 7 +++----
 cvat/apps/engine/settings.py         | 3 +--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
index 8ef54f749035..e42c014e6179 100644
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@@ -7,9 +7,6 @@
 from pyunpack import Archive
 from PIL import Image
 
-from .log import slogger
-
-
 class MediaExtractor:
     def __init__(self, source_path, dest_path, image_quality):
         self._source_path = source_path
@@ -24,6 +21,8 @@ def __iter__(self):
 
 class ImageListExtractor(MediaExtractor):
     def __init__(self, source_path, dest_path, image_quality):
+        if not source_path:
+            raise Exception('No image found')
         return super().__init__(source_path, dest_path, image_quality)
 
     def __iter__(self):
@@ -57,7 +56,7 @@ def __init__(self, source_path, dest_path, image_quality):
         image_paths = []
         for root, _, files in os.walk(source_path[0]):
             paths = [os.path.join(root, f) for f in files]
-            paths = filter(lambda x: get_mime(x) == 'image', paths)
+            paths = filter(lambda x: _get_mime(x) == 'image', paths)
             image_paths.extend(paths)
         image_paths.sort()
         super().__init__(image_paths, dest_path, image_quality)
diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py
index cba200fcb135..7fe1874e542b 100644
--- a/cvat/apps/engine/settings.py
+++ b/cvat/apps/engine/settings.py
@@ -5,7 +5,7 @@
 _SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
 
 MEDIA_MIMETYPES_FILES = [
-    os.path.join(_SCRIPT_DIR, "media.mimetypes")
+    os.path.join(_SCRIPT_DIR, "media.mimetypes"),
 ]
 
 mimetypes.init(files=MEDIA_MIMETYPES_FILES)
@@ -50,7 +50,6 @@ def _is_dir(path):
 }
 
 def _get_mime(name):
-    from cvat.apps.engine.settings import MEDIA_TYPES
     for type_name, type_def in MEDIA_TYPES.items():
         if type_def['has_mime_type'](name):
             return type_name

From 00cdf893dbb9a35bc90e35fb2b55cfb6f1436970 Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Mon, 29 Apr 2019 20:30:18 +0300
Subject: [PATCH 04/12] fixed logging

---
 cvat/apps/engine/media_extractors.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
index e42c014e6179..6f817e09d99d 100644
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@@ -13,12 +13,6 @@ def __init__(self, source_path, dest_path, image_quality):
         self._dest_path = dest_path
         self._image_quality = image_quality
 
-    def __getitem__(self, k):
-        pass
-
-    def __iter__(self):
-        pass
-
 class ImageListExtractor(MediaExtractor):
     def __init__(self, source_path, dest_path, image_quality):
         if not source_path:
@@ -76,6 +70,7 @@ def __init__(self, source_path, dest_path, image_quality):
 
 class VideoExtractor(MediaExtractor):
     def __init__(self, source_path, dest_path, image_quality):
+        from cvat.apps.engine.log import slogger
         _dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
         super().__init__(source_path[0], _dest_path, image_quality)
         # translate inversed range 1:95 to 2:32

From a94327c97fcc087d503b7cdaf892b1cad9565394 Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Mon, 29 Apr 2019 22:15:49 +0300
Subject: [PATCH 05/12] fixed directory extractor

---
 cvat/apps/engine/media_extractors.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
index 6f817e09d99d..094478db5a4f 100644
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@@ -48,10 +48,11 @@ class DirectoryExtractor(ImageListExtractor):
     def __init__(self, source_path, dest_path, image_quality):
         from cvat.apps.engine.settings import _get_mime
         image_paths = []
-        for root, _, files in os.walk(source_path[0]):
-            paths = [os.path.join(root, f) for f in files]
-            paths = filter(lambda x: _get_mime(x) == 'image', paths)
-            image_paths.extend(paths)
+        for source in source_path:
+            for root, _, files in os.walk(source):
+                paths = [os.path.join(root, f) for f in files]
+                paths = filter(lambda x: _get_mime(x) == 'image', paths)
+                image_paths.extend(paths)
         image_paths.sort()
         super().__init__(image_paths, dest_path, image_quality)
 

From f7d43e17a49fb8fcf2cb7106e256f79a348d53ef Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Tue, 30 Apr 2019 09:27:20 +0300
Subject: [PATCH 06/12] fixed codacy issues

---
 cvat/apps/engine/media_extractors.py | 2 +-
 cvat/apps/engine/task.py             | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
index 094478db5a4f..6512c5e9c8b0 100644
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@@ -17,7 +17,7 @@ class ImageListExtractor(MediaExtractor):
     def __init__(self, source_path, dest_path, image_quality):
         if not source_path:
             raise Exception('No image found')
-        return super().__init__(source_path, dest_path, image_quality)
+        super().__init__(source_path, dest_path, image_quality)
 
     def __iter__(self):
         return iter(self._source_path)
diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
index 5e97ee371d6b..3fffb9e81b73 100644
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@@ -7,8 +7,6 @@
 import sys
 import rq
 import shutil
-import tempfile
-import numpy as np
 from PIL import Image
 from traceback import print_exception
 from ast import literal_eval

From 90cebc3bd343b2c520d785f385f6634a6f09377a Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Tue, 30 Apr 2019 12:56:26 +0300
Subject: [PATCH 07/12] added task mode settings

---
 cvat/apps/engine/settings.py |  4 ++++
 cvat/apps/engine/task.py     | 19 +++++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py
index 7fe1874e542b..7eb19a644f6d 100644
--- a/cvat/apps/engine/settings.py
+++ b/cvat/apps/engine/settings.py
@@ -34,18 +34,22 @@ def _is_dir(path):
     'image': {
         'has_mime_type': _is_image,
         'extractor': ImageListExtractor,
+        'mode': 'annotation',
     },
     'video': {
         'has_mime_type': _is_video,
         'extractor': VideoExtractor,
+        'mode': 'interpolation',
     },
     'archive': {
         'has_mime_type': _is_archive,
         'extractor': ArchiveExtractor,
+        'mode': 'annotation',
     },
     'directory': {
         'has_mime_type': _is_dir,
         'extractor': DirectoryExtractor,
+        'mode': 'annotation',
     },
 }
 
diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
index 3fffb9e81b73..323656a51f72 100644
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@@ -207,10 +207,7 @@ def _create_thread(tid, data):
         _copy_data_from_share(data['server_files'], upload_dir)
 
     job = rq.get_current_job()
-
     db_images = []
-
-    db_task.mode = 'interpolation' if media['video'] else 'annotation'
     for media_type, media_files in media.items():
         if not media_files:
             continue
@@ -221,6 +218,7 @@ def _create_thread(tid, data):
             image_quality=db_task.image_quality,
         )
 
+        db_task.mode = MEDIA_TYPES[media_type]['mode']
         for frame, image_orig_path in enumerate(extractor):
             image_dest_path = db_task.get_frame_path(frame)
             db_task.size += 1
@@ -239,13 +237,14 @@ def _create_thread(tid, data):
                 db_images.append(models.Image(task=db_task, path=image_orig_path,
                     frame=frame, width=width, height=height))
 
-    if db_task.mode == 'interpolation':
-        image = Image.open(db_task.get_frame_path(0))
-        models.Video.objects.create(task=db_task, path=media['video'][0],
-            start_frame=0, stop_frame=db_task.size, step=1,
-            width=image.width, height=image.height)
-        image.close()
-    else:
+        if db_task.mode == 'interpolation':
+            image = Image.open(db_task.get_frame_path(0))
+            models.Video.objects.create(task=db_task, path=media[media_type][0],
+                start_frame=0, stop_frame=db_task.size, step=1,
+                width=image.width, height=image.height)
+            image.close()
+
+    if db_task.mode == 'annotation':
         models.Image.objects.bulk_create(db_images)
 
     slogger.glob.info("Founded frames {} for task #{}".format(db_task.size, tid))

From 1b2506aedad0db4c18d17f57a5ee0ab44f4b7c33 Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Tue, 30 Apr 2019 18:29:14 +0300
Subject: [PATCH 08/12] improved media data validation fixed bugs

---
 cvat/apps/engine/media_extractors.py |  3 +
 cvat/apps/engine/settings.py         | 12 ++++
 cvat/apps/engine/task.py             | 82 +++++++++++++++++-----------
 3 files changed, 66 insertions(+), 31 deletions(-)

diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
index 6512c5e9c8b0..32aa3c78fca1 100644
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@@ -13,6 +13,9 @@ def __init__(self, source_path, dest_path, image_quality):
         self._dest_path = dest_path
         self._image_quality = image_quality
 
+    def get_source_name(self):
+        return self._source_path
+
 class ImageListExtractor(MediaExtractor):
     def __init__(self, source_path, dest_path, image_quality):
         if not source_path:
diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py
index 7eb19a644f6d..6d321ba4d0d5 100644
--- a/cvat/apps/engine/settings.py
+++ b/cvat/apps/engine/settings.py
@@ -30,26 +30,38 @@ def _is_image(path):
 def _is_dir(path):
     return os.path.isdir(path)
 
+# 'has_mime_type': function receives 1 argument - path to file.
+#                  Should return True if file has specified media type.
+# 'extractor': class that extracts images from specified media.
+# 'mode': 'annotation' or 'interpolation' - mode of task that should be created.
+# 'unique': True or False - describes how the type can be combined with other.
+#           True - only one item of this type and no other is allowed
+#           False - this media types can be combined with other which have unique == False
+
 MEDIA_TYPES = {
     'image': {
         'has_mime_type': _is_image,
         'extractor': ImageListExtractor,
         'mode': 'annotation',
+        'unique': False,
     },
     'video': {
         'has_mime_type': _is_video,
         'extractor': VideoExtractor,
         'mode': 'interpolation',
+        'unique': True,
     },
     'archive': {
         'has_mime_type': _is_archive,
         'extractor': ArchiveExtractor,
         'mode': 'annotation',
+        'unique': True,
     },
     'directory': {
         'has_mime_type': _is_dir,
         'extractor': DirectoryExtractor,
         'mode': 'annotation',
+        'unique': False,
     },
 }
 
diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
index 323656a51f72..ab7e689182b2 100644
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@@ -177,18 +177,24 @@ def count_files(file_mapping, counter):
         counter=counter,
     )
 
-    num_videos = len(counter["video"])
-    num_archives = len(counter["archive"])
-    num_images = len(counter["image"]) + len(counter["directory"])
-    if (num_videos > 1 or num_archives > 1 or
-        (num_videos == 1 and num_archives + num_images > 0) or
-        (num_archives == 1 and num_videos + num_images > 0) or
-        (num_images > 0 and num_archives + num_videos > 0)):
-
-        raise ValueError("Only one archive, one video or many images can be \
-            dowloaded simultaneously. {} image(s), {} dir(s), {} video(s), {} \
-            archive(s) found".format(counter['image'], counter['directory'],
-                counter['video'], counter['archive']))
+    unique_entries = 0
+    multiple_entries = 0
+    for media_type, media_config in MEDIA_TYPES.items():
+        if counter[media_type]:
+            if media_config['unique']:
+                unique_entries += len(counter[media_type])
+            else:
+                multiple_entries += len(counter[media_type])
+
+    if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1:
+        unique_types = ', '.join([k for k, v in MEDIA_TYPES.items() if v['unique']])
+        multiply_types = ', '.join([k for k, v in MEDIA_TYPES.items() if not v['unique']])
+        count = ', '.join(['{} {}(s)'.format(len(v), k) for k, v in counter.items()])
+        raise ValueError('Only one {} or many {} can be used simultaneously, \
+            but {} found.'.format(unique_types, multiply_types, count))
+
+    if unique_entries == 0 and multiple_entries == 0:
+        raise ValueError('No media data found')
 
     return counter
 
@@ -207,7 +213,12 @@ def _create_thread(tid, data):
         _copy_data_from_share(data['server_files'], upload_dir)
 
     job = rq.get_current_job()
+    job.meta['status'] = 'Media files is being extracted...'
+    job.save_meta()
+
     db_images = []
+    extractors = []
+    length = 0
     for media_type, media_files in media.items():
         if not media_files:
             continue
@@ -217,34 +228,43 @@ def _create_thread(tid, data):
             dest_path=upload_dir,
             image_quality=db_task.image_quality,
         )
-
+        length += len(extractor)
         db_task.mode = MEDIA_TYPES[media_type]['mode']
+        extractors.append(extractor)
+
+    for extractor in extractors:
         for frame, image_orig_path in enumerate(extractor):
-            image_dest_path = db_task.get_frame_path(frame)
-            db_task.size += 1
+            image_dest_path = db_task.get_frame_path(db_task.size)
             dirname = os.path.dirname(image_dest_path)
+
             if not os.path.exists(dirname):
-                    os.makedirs(dirname)
+                os.makedirs(dirname)
+
             if db_task.mode == 'interpolation':
-                job.meta['status'] = 'Video is being extracted..'
-                job.save_meta()
                 extractor.save_image(frame, image_dest_path)
             else:
-                progress = frame * 100 // len(extractor)
-                job.meta['status'] = 'Images are being compressed.. {}%'.format(progress)
-                job.save_meta()
                 width, height = extractor.save_image(frame, image_dest_path)
-                db_images.append(models.Image(task=db_task, path=image_orig_path,
-                    frame=frame, width=width, height=height))
+                db_images.append(models.Image(
+                    task=db_task,
+                    path=os.path.relpath(image_orig_path, upload_dir),
+                    frame=db_task.size,
+                    width=width, height=height))
 
-        if db_task.mode == 'interpolation':
-            image = Image.open(db_task.get_frame_path(0))
-            models.Video.objects.create(task=db_task, path=media[media_type][0],
-                start_frame=0, stop_frame=db_task.size, step=1,
-                width=image.width, height=image.height)
-            image.close()
-
-    if db_task.mode == 'annotation':
+            db_task.size += 1
+            progress = frame * 100 // length
+            job.meta['status'] = 'Images are being compressed... {}%'.format(progress)
+            job.save_meta()
+
+    if db_task.mode == 'interpolation':
+        image = Image.open(db_task.get_frame_path(0))
+        models.Video.objects.create(
+            task=db_task,
+            path=os.path.relpath(extractors[0].get_source_name(), upload_dir),
+            start_frame=0, stop_frame=db_task.size,
+            step=1,
+            width=image.width, height=image.height)
+        image.close()
+    else:
         models.Image.objects.bulk_create(db_images)
 
     slogger.glob.info("Founded frames {} for task #{}".format(db_task.size, tid))

From 392e2023ab906fc70c380f340f27035cdc28075c Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Wed, 5 Jun 2019 16:34:45 +0300
Subject: [PATCH 09/12] Adopt changes from develop

---
 cvat/apps/engine/media_extractors.py | 65 +++++++++++++++++++++++-----
 cvat/apps/engine/task.py             | 11 +++--
 2 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
index 32aa3c78fca1..78807df21685 100644
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@@ -8,19 +8,30 @@
 from PIL import Image
 
 class MediaExtractor:
-    def __init__(self, source_path, dest_path, image_quality):
+    def __init__(self, source_path, dest_path, image_quality, step, start, stop):
         self._source_path = source_path
         self._dest_path = dest_path
         self._image_quality = image_quality
+        self._step = step
+        self._start = start
+        self._stop = stop
 
     def get_source_name(self):
         return self._source_path
 
+#Note step, start, stop have no affect
 class ImageListExtractor(MediaExtractor):
-    def __init__(self, source_path, dest_path, image_quality):
+    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
         if not source_path:
             raise Exception('No image found')
-        super().__init__(source_path, dest_path, image_quality)
+        super().__init__(
+            source_path=sorted(source_path),
+            dest_path=dest_path,
+            image_quality=image_quality,
+            step=1,
+            start=0,
+            stop=0,
+        )
 
     def __iter__(self):
         return iter(self._source_path)
@@ -47,8 +58,9 @@ def save_image(self, k, dest_path):
         image.close()
         return width, height
 
+#Note step, start, stop have no affect
 class DirectoryExtractor(ImageListExtractor):
-    def __init__(self, source_path, dest_path, image_quality):
+    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
         from cvat.apps.engine.settings import _get_mime
         image_paths = []
         for source in source_path:
@@ -56,11 +68,18 @@ def __init__(self, source_path, dest_path, image_quality):
                 paths = [os.path.join(root, f) for f in files]
                 paths = filter(lambda x: _get_mime(x) == 'image', paths)
                 image_paths.extend(paths)
-        image_paths.sort()
-        super().__init__(image_paths, dest_path, image_quality)
-
+        super().__init__(
+            source_path=sorted(source_path),
+            dest_path=dest_path,
+            image_quality=image_quality,
+            step=1,
+            start=0,
+            stop=0,
+        )
+
+#Note step, start, stop have no affect
 class ArchiveExtractor(ImageListExtractor):
-    def __init__(self, source_path, dest_path, image_quality):
+    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
         from cvat.apps.engine.settings import _get_mime
         Archive(source_path[0]).extractall(dest_path)
         os.remove(source_path[0])
@@ -69,20 +88,42 @@ def __init__(self, source_path, dest_path, image_quality):
             paths = [os.path.join(root, f) for f in files]
             paths = filter(lambda x: _get_mime(x) == 'image', paths)
             image_paths.extend(paths)
-        image_paths.sort()
-        super().__init__(image_paths, dest_path, image_quality)
+        super().__init__(
+            source_path=sorted(source_path),
+            dest_path=dest_path,
+            image_quality=image_quality,
+            step=1,
+            start=0,
+            stop=0,
+        )
 
 class VideoExtractor(MediaExtractor):
-    def __init__(self, source_path, dest_path, image_quality):
+    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
         from cvat.apps.engine.log import slogger
         _dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
-        super().__init__(source_path[0], _dest_path, image_quality)
+        super().__init__(
+            source_path=source_path[0],
+            dest_path=_dest_path,
+            image_quality=image_quality,
+            step=step,
+            start=start,
+            stop=stop,
+            )
         # translate inversed range 1:95 to 2:32
         translated_quality = 96 - self._image_quality
         translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2)
         self._tmp_output = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
         target_path = os.path.join(self._tmp_output, '%d.jpg')
         output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality)
+        filters = ''
+        if self._stop > 0:
+            filters = 'between(n,' + str(self._start) + ',' + str(self._stop) + ')'
+        elif self._start > 0:
+            filters = 'gte(n,' + str(self._start) + ')'
+        if self._step > 1:
+            filters += ('*' if filters else '') + 'not(mod(n-' + str(self._start) + ',' + str(self._step) + '))'
+        if filters:
+            output_opts += " -vf select=\"'" + filters + "'\""
 
         ff = FFmpeg(
             inputs  = {self._source_path: None},
diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
index ab7e689182b2..82690daf1a18 100644
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@@ -227,6 +227,9 @@ def _create_thread(tid, data):
             source_path=[os.path.join(upload_dir, f) for f in media_files],
             dest_path=upload_dir,
             image_quality=db_task.image_quality,
+            step=db_task.get_frame_step(),
+            start=db_task.start_frame,
+            stop=db_task.stop_frame,
         )
         length += len(extractor)
         db_task.mode = MEDIA_TYPES[media_type]['mode']
@@ -246,7 +249,7 @@ def _create_thread(tid, data):
                 width, height = extractor.save_image(frame, image_dest_path)
                 db_images.append(models.Image(
                     task=db_task,
-                    path=os.path.relpath(image_orig_path, upload_dir),
+                    path=image_orig_path,
                     frame=db_task.size,
                     width=width, height=height))
 
@@ -259,11 +262,11 @@ def _create_thread(tid, data):
         image = Image.open(db_task.get_frame_path(0))
         models.Video.objects.create(
             task=db_task,
-            path=os.path.relpath(extractors[0].get_source_name(), upload_dir),
-            start_frame=0, stop_frame=db_task.size,
-            step=1,
+            path=extractors[0].get_source_name(),
             width=image.width, height=image.height)
         image.close()
+        if db_task.stop_frame == 0:
+            db_task.stop_frame = db_task.start_frame + (db_task.size - 1) * db_task.get_frame_step()
     else:
         models.Image.objects.bulk_create(db_images)
 

From 97a1c7b54de89d9acb55847e0ab9ad94346c6102 Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Mon, 8 Jul 2019 09:52:02 +0300
Subject: [PATCH 10/12] moved engine.settings to engine.media_extractors module

---
 cvat/apps/engine/media_extractors.py          | 79 +++++++++++++++++--
 .../0016_attribute_spec_20190217.py           |  6 +-
 cvat/apps/engine/settings.py                  | 73 -----------------
 cvat/apps/engine/task.py                      |  8 +-
 4 files changed, 80 insertions(+), 86 deletions(-)
 delete mode 100644 cvat/apps/engine/settings.py

diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
index 78807df21685..f8ebea777c90 100644
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@@ -7,6 +7,20 @@
 from pyunpack import Archive
 from PIL import Image
 
+import mimetypes
+_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
+MEDIA_MIMETYPES_FILES = [
+    os.path.join(_SCRIPT_DIR, "media.mimetypes"),
+]
+mimetypes.init(files=MEDIA_MIMETYPES_FILES)
+
+def get_mime(name):
+    for type_name, type_def in MEDIA_TYPES.items():
+        if type_def['has_mime_type'](name):
+            return type_name
+
+    return 'unknown'
+
 class MediaExtractor:
     def __init__(self, source_path, dest_path, image_quality, step, start, stop):
         self._source_path = source_path
@@ -61,15 +75,14 @@ def save_image(self, k, dest_path):
 #Note step, start, stop have no affect
 class DirectoryExtractor(ImageListExtractor):
     def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
-        from cvat.apps.engine.settings import _get_mime
         image_paths = []
         for source in source_path:
             for root, _, files in os.walk(source):
                 paths = [os.path.join(root, f) for f in files]
-                paths = filter(lambda x: _get_mime(x) == 'image', paths)
+                paths = filter(lambda x: get_mime(x) == 'image', paths)
                 image_paths.extend(paths)
         super().__init__(
-            source_path=sorted(source_path),
+            source_path=sorted(image_paths),
             dest_path=dest_path,
             image_quality=image_quality,
             step=1,
@@ -80,16 +93,15 @@ def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=
 #Note step, start, stop have no affect
 class ArchiveExtractor(ImageListExtractor):
     def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
-        from cvat.apps.engine.settings import _get_mime
         Archive(source_path[0]).extractall(dest_path)
         os.remove(source_path[0])
         image_paths = []
         for root, _, files in os.walk(dest_path):
             paths = [os.path.join(root, f) for f in files]
-            paths = filter(lambda x: _get_mime(x) == 'image', paths)
+            paths = filter(lambda x: get_mime(x) == 'image', paths)
             image_paths.extend(paths)
         super().__init__(
-            source_path=sorted(source_path),
+            source_path=sorted(image_paths),
             dest_path=dest_path,
             image_quality=image_quality,
             step=1,
@@ -153,3 +165,58 @@ def __len__(self):
 
     def save_image(self, k, dest_path):
         shutil.copyfile(self[k], dest_path)
+
+def _is_archive(path):
+    mime = mimetypes.guess_type(path)
+    mime_type = mime[0]
+    encoding = mime[1]
+    supportedArchives = ['application/zip', 'application/x-rar-compressed',
+        'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
+        'gzip', 'bzip2']
+    return mime_type in supportedArchives or encoding in supportedArchives
+
+def _is_video(path):
+    mime = mimetypes.guess_type(path)
+    return mime[0] is not None and mime[0].startswith('video')
+
+def _is_image(path):
+    mime = mimetypes.guess_type(path)
+    return mime[0] is not None and mime[0].startswith('image')
+
+def _is_dir(path):
+    return os.path.isdir(path)
+
+# 'has_mime_type': function receives 1 argument - path to file.
+#                  Should return True if file has specified media type.
+# 'extractor': class that extracts images from specified media.
+# 'mode': 'annotation' or 'interpolation' - mode of task that should be created.
+# 'unique': True or False - describes how the type can be combined with other.
+#           True - only one item of this type and no other is allowed
+#           False - this media types can be combined with other which have unique == False
+
+MEDIA_TYPES = {
+    'image': {
+        'has_mime_type': _is_image,
+        'extractor': ImageListExtractor,
+        'mode': 'annotation',
+        'unique': False,
+    },
+    'video': {
+        'has_mime_type': _is_video,
+        'extractor': VideoExtractor,
+        'mode': 'interpolation',
+        'unique': True,
+    },
+    'archive': {
+        'has_mime_type': _is_archive,
+        'extractor': ArchiveExtractor,
+        'mode': 'annotation',
+        'unique': True,
+    },
+    'directory': {
+        'has_mime_type': _is_dir,
+        'extractor': DirectoryExtractor,
+        'mode': 'annotation',
+        'unique': False,
+    },
+}
diff --git a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
index 25dcd61310b0..27d273af2790 100644
--- a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
+++ b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
@@ -5,7 +5,7 @@
 from PIL import Image
 from django.db import migrations
 from django.conf import settings
-from cvat.apps.engine.settings import _get_mime
+from cvat.apps.engine.media_extractors import get_mime
 
 def parse_attribute(value):
     match = re.match(r'^([~@])(\w+)=(\w+):(.+)?$', value)
@@ -81,7 +81,7 @@ def fill_task_meta_data_forward(apps, schema_editor):
             video = ""
             for root, _, files in os.walk(_get_upload_dirname(db_task)):
                 fullnames = map(lambda f: os.path.join(root, f), files)
-                videos = list(filter(lambda x: _get_mime(x) == 'video', fullnames))
+                videos = list(filter(lambda x: get_mime(x) == 'video', fullnames))
                 if len(videos):
                     video = videos[0]
                     break
@@ -100,7 +100,7 @@ def fill_task_meta_data_forward(apps, schema_editor):
             filenames = []
             for root, _, files in os.walk(_get_upload_dirname(db_task)):
                 fullnames = map(lambda f: os.path.join(root, f), files)
-                images = filter(lambda x: _get_mime(x) == 'image', fullnames)
+                images = filter(lambda x: get_mime(x) == 'image', fullnames)
                 filenames.extend(images)
             filenames.sort()
 
diff --git a/cvat/apps/engine/settings.py b/cvat/apps/engine/settings.py
deleted file mode 100644
index 6d321ba4d0d5..000000000000
--- a/cvat/apps/engine/settings.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import os
-import mimetypes
-from cvat.apps.engine.media_extractors import ImageListExtractor, DirectoryExtractor, \
-    VideoExtractor, ArchiveExtractor
-_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
-
-MEDIA_MIMETYPES_FILES = [
-    os.path.join(_SCRIPT_DIR, "media.mimetypes"),
-]
-
-mimetypes.init(files=MEDIA_MIMETYPES_FILES)
-
-def _is_archive(path):
-    mime = mimetypes.guess_type(path)
-    mime_type = mime[0]
-    encoding = mime[1]
-    supportedArchives = ['application/zip', 'application/x-rar-compressed',
-        'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
-        'gzip', 'bzip2']
-    return mime_type in supportedArchives or encoding in supportedArchives
-
-def _is_video(path):
-    mime = mimetypes.guess_type(path)
-    return mime[0] is not None and mime[0].startswith('video')
-
-def _is_image(path):
-    mime = mimetypes.guess_type(path)
-    return mime[0] is not None and mime[0].startswith('image')
-
-def _is_dir(path):
-    return os.path.isdir(path)
-
-# 'has_mime_type': function receives 1 argument - path to file.
-#                  Should return True if file has specified media type.
-# 'extractor': class that extracts images from specified media.
-# 'mode': 'annotation' or 'interpolation' - mode of task that should be created.
-# 'unique': True or False - describes how the type can be combined with other.
-#           True - only one item of this type and no other is allowed
-#           False - this media types can be combined with other which have unique == False
-
-MEDIA_TYPES = {
-    'image': {
-        'has_mime_type': _is_image,
-        'extractor': ImageListExtractor,
-        'mode': 'annotation',
-        'unique': False,
-    },
-    'video': {
-        'has_mime_type': _is_video,
-        'extractor': VideoExtractor,
-        'mode': 'interpolation',
-        'unique': True,
-    },
-    'archive': {
-        'has_mime_type': _is_archive,
-        'extractor': ArchiveExtractor,
-        'mode': 'annotation',
-        'unique': True,
-    },
-    'directory': {
-        'has_mime_type': _is_dir,
-        'extractor': DirectoryExtractor,
-        'mode': 'annotation',
-        'unique': False,
-    },
-}
-
-def _get_mime(name):
-    for type_name, type_def in MEDIA_TYPES.items():
-        if type_def['has_mime_type'](name):
-            return type_name
-
-    return 'unknown'
diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
index c3fe3a10cff8..7d1fbd188953 100644
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@@ -14,7 +14,7 @@
 from urllib import parse as urlparse
 from urllib import request as urlrequest
 
-from cvat.apps.engine.settings import _get_mime, MEDIA_TYPES
+from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES
 
 import django_rq
 from django.conf import settings
@@ -60,7 +60,7 @@ def make_image_meta_cache(db_task):
             filenames = []
             for root, _, files in os.walk(db_task.get_upload_dirname()):
                 fullnames = map(lambda f: os.path.join(root, f), files)
-                images = filter(lambda x: _get_mime(x) == 'image', fullnames)
+                images = filter(lambda x: get_mime(x) == 'image', fullnames)
                 filenames.extend(images)
             filenames.sort()
 
@@ -152,7 +152,7 @@ def _validate_data(data):
         if '..' in path.split(os.path.sep):
             raise ValueError("Don't use '..' inside file paths")
         full_path = os.path.abspath(os.path.join(share_root, path))
-        if 'directory' == _get_mime(full_path):
+        if 'directory' == get_mime(full_path):
             server_files['dirs'].append(path)
         else:
             server_files['files'].append(path)
@@ -165,7 +165,7 @@ def _validate_data(data):
 
     def count_files(file_mapping, counter):
         for rel_path, full_path in file_mapping.items():
-            mime = _get_mime(full_path)
+            mime = get_mime(full_path)
             counter[mime].append(rel_path)
 
     counter = { media_type: [] for media_type in MEDIA_TYPES.keys() }

From 59e8634fe5b1904df1fe803a26a93d9327e14bd3 Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Mon, 8 Jul 2019 10:55:18 +0300
Subject: [PATCH 11/12] removed code duplication

---
 cvat/apps/engine/media_extractors.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
index f8ebea777c90..bc4424ba80ac 100644
--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@@ -91,17 +91,11 @@ def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=
         )
 
 #Note step, start, stop have no affect
-class ArchiveExtractor(ImageListExtractor):
+class ArchiveExtractor(DirectoryExtractor):
     def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
         Archive(source_path[0]).extractall(dest_path)
-        os.remove(source_path[0])
-        image_paths = []
-        for root, _, files in os.walk(dest_path):
-            paths = [os.path.join(root, f) for f in files]
-            paths = filter(lambda x: get_mime(x) == 'image', paths)
-            image_paths.extend(paths)
         super().__init__(
-            source_path=sorted(image_paths),
+            source_path=[dest_path],
             dest_path=dest_path,
             image_quality=image_quality,
             step=1,

From 18e9ef698dc6214def12cf658452c378b71bdfbb Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <andrey.zhavoronkov@intel.com>
Date: Mon, 8 Jul 2019 11:03:48 +0300
Subject: [PATCH 12/12] added a note to changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index db44d5423858..5d746e0bb129 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Video frame filter
 - Admins are no longer limited to a subset of python commands in the auto annotation application
 - Remote data source (list of URLs to create an annotation task)
+- Ability to create a custom extractors for unsupported media types
 
 ### Changed
 - Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before)