From b48d59b5ec2fc5a87977569c0e8378a53cf1fbae Mon Sep 17 00:00:00 2001
From: Andrey Zhavoronkov <41117609+azhavoro@users.noreply.github.com>
Date: Mon, 8 Jul 2019 11:10:50 +0300
Subject: [PATCH] Ability to create custom extractors (#434)

* ability to add custom extractors
* added configurable mimetypes
* added a note to changelog
---
 CHANGELOG.md                                  |   1 +
 cvat/apps/engine/media_extractors.py          | 216 ++++++++++++++
 .../0016_attribute_spec_20190217.py           |   6 +-
 cvat/apps/engine/task.py                      | 266 ++++++------------
 4 files changed, 304 insertions(+), 185 deletions(-)
 create mode 100644 cvat/apps/engine/media_extractors.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1964ddfb613a..ceb6f440e0ca 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Remote data source (list of URLs to create an annotation task)
 - Auto annotation using Faster R-CNN with Inception v2 (utils/open_model_zoo)
 - Auto annotation using Pixel Link mobilenet v2 - text detection (utils/open_model_zoo)
+- Ability to create a custom extractors for unsupported media types
 
 ### Changed
 - Outside and keyframe buttons in the side panel for all interpolation shapes (they were only for boxes before)
diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
new file mode 100644
index 000000000000..bc4424ba80ac
--- /dev/null
+++ b/cvat/apps/engine/media_extractors.py
@@ -0,0 +1,216 @@
+import os
+import tempfile
+import shutil
+import numpy as np
+
+from ffmpy import FFmpeg
+from pyunpack import Archive
+from PIL import Image
+
+import mimetypes
+_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
+MEDIA_MIMETYPES_FILES = [
+    os.path.join(_SCRIPT_DIR, "media.mimetypes"),
+]
+mimetypes.init(files=MEDIA_MIMETYPES_FILES)
+
+def get_mime(name):
+    for type_name, type_def in MEDIA_TYPES.items():
+        if type_def['has_mime_type'](name):
+            return type_name
+
+    return 'unknown'
+
+class MediaExtractor:
+    def __init__(self, source_path, dest_path, image_quality, step, start, stop):
+        self._source_path = source_path
+        self._dest_path = dest_path
+        self._image_quality = image_quality
+        self._step = step
+        self._start = start
+        self._stop = stop
+
+    def get_source_name(self):
+        return self._source_path
+
+#Note step, start, stop have no affect
+class ImageListExtractor(MediaExtractor):
+    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
+        if not source_path:
+            raise Exception('No image found')
+        super().__init__(
+            source_path=sorted(source_path),
+            dest_path=dest_path,
+            image_quality=image_quality,
+            step=1,
+            start=0,
+            stop=0,
+        )
+
+    def __iter__(self):
+        return iter(self._source_path)
+
+    def __getitem__(self, k):
+        return self._source_path[k]
+
+    def __len__(self):
+        return len(self._source_path)
+
+    def save_image(self, k, dest_path):
+        image = Image.open(self[k])
+        # Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion
+        if image.mode == "I":
+            # Image mode is 32bit integer pixels.
+            # Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit
+            im_data = np.array(image)
+            im_data = im_data * (2**8 / im_data.max())
+            image = Image.fromarray(im_data.astype(np.int32))
+        image = image.convert('RGB')
+        image.save(dest_path, quality=self._image_quality, optimize=True)
+        height = image.height
+        width = image.width
+        image.close()
+        return width, height
+
+#Note step, start, stop have no affect
+class DirectoryExtractor(ImageListExtractor):
+    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
+        image_paths = []
+        for source in source_path:
+            for root, _, files in os.walk(source):
+                paths = [os.path.join(root, f) for f in files]
+                paths = filter(lambda x: get_mime(x) == 'image', paths)
+                image_paths.extend(paths)
+        super().__init__(
+            source_path=sorted(image_paths),
+            dest_path=dest_path,
+            image_quality=image_quality,
+            step=1,
+            start=0,
+            stop=0,
+        )
+
+#Note step, start, stop have no affect
+class ArchiveExtractor(DirectoryExtractor):
+    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
+        Archive(source_path[0]).extractall(dest_path)
+        super().__init__(
+            source_path=[dest_path],
+            dest_path=dest_path,
+            image_quality=image_quality,
+            step=1,
+            start=0,
+            stop=0,
+        )
+
+class VideoExtractor(MediaExtractor):
+    def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0):
+        from cvat.apps.engine.log import slogger
+        _dest_path = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
+        super().__init__(
+            source_path=source_path[0],
+            dest_path=_dest_path,
+            image_quality=image_quality,
+            step=step,
+            start=start,
+            stop=stop,
+            )
+        # translate inversed range 1:95 to 2:32
+        translated_quality = 96 - self._image_quality
+        translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2)
+        self._tmp_output = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
+        target_path = os.path.join(self._tmp_output, '%d.jpg')
+        output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality)
+        filters = ''
+        if self._stop > 0:
+            filters = 'between(n,' + str(self._start) + ',' + str(self._stop) + ')'
+        elif self._start > 0:
+            filters = 'gte(n,' + str(self._start) + ')'
+        if self._step > 1:
+            filters += ('*' if filters else '') + 'not(mod(n-' + str(self._start) + ',' + str(self._step) + '))'
+        if filters:
+            output_opts += " -vf select=\"'" + filters + "'\""
+
+        ff = FFmpeg(
+            inputs  = {self._source_path: None},
+            outputs = {target_path: output_opts})
+
+        slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
+        ff.run()
+
+    def _getframepath(self, k):
+        return "{0}/{1}.jpg".format(self._tmp_output, k)
+
+    def __iter__(self):
+        i = 0
+        while os.path.exists(self._getframepath(i)):
+            yield self._getframepath(i)
+            i += 1
+
+    def __del__(self):
+        if self._tmp_output:
+            shutil.rmtree(self._tmp_output)
+
+    def __getitem__(self, k):
+        return self._getframepath(k)
+
+    def __len__(self):
+        return len(os.listdir(self._tmp_output))
+
+    def save_image(self, k, dest_path):
+        shutil.copyfile(self[k], dest_path)
+
+def _is_archive(path):
+    mime = mimetypes.guess_type(path)
+    mime_type = mime[0]
+    encoding = mime[1]
+    supportedArchives = ['application/zip', 'application/x-rar-compressed',
+        'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
+        'gzip', 'bzip2']
+    return mime_type in supportedArchives or encoding in supportedArchives
+
+def _is_video(path):
+    mime = mimetypes.guess_type(path)
+    return mime[0] is not None and mime[0].startswith('video')
+
+def _is_image(path):
+    mime = mimetypes.guess_type(path)
+    return mime[0] is not None and mime[0].startswith('image')
+
+def _is_dir(path):
+    return os.path.isdir(path)
+
+# 'has_mime_type': function receives 1 argument - path to file.
+#                  Should return True if file has specified media type.
+# 'extractor': class that extracts images from specified media.
+# 'mode': 'annotation' or 'interpolation' - mode of task that should be created.
+# 'unique': True or False - describes how the type can be combined with other.
+#           True - only one item of this type and no other is allowed
+#           False - this media types can be combined with other which have unique == False
+
+MEDIA_TYPES = {
+    'image': {
+        'has_mime_type': _is_image,
+        'extractor': ImageListExtractor,
+        'mode': 'annotation',
+        'unique': False,
+    },
+    'video': {
+        'has_mime_type': _is_video,
+        'extractor': VideoExtractor,
+        'mode': 'interpolation',
+        'unique': True,
+    },
+    'archive': {
+        'has_mime_type': _is_archive,
+        'extractor': ArchiveExtractor,
+        'mode': 'annotation',
+        'unique': True,
+    },
+    'directory': {
+        'has_mime_type': _is_dir,
+        'extractor': DirectoryExtractor,
+        'mode': 'annotation',
+        'unique': False,
+    },
+}
diff --git a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
index dfb84fff98fd..27d273af2790 100644
--- a/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
+++ b/cvat/apps/engine/migrations/0016_attribute_spec_20190217.py
@@ -5,7 +5,7 @@
 from PIL import Image
 from django.db import migrations
 from django.conf import settings
-from cvat.apps.engine.task import _get_mime
+from cvat.apps.engine.media_extractors import get_mime
 
 def parse_attribute(value):
     match = re.match(r'^([~@])(\w+)=(\w+):(.+)?$', value)
@@ -81,7 +81,7 @@ def fill_task_meta_data_forward(apps, schema_editor):
             video = ""
             for root, _, files in os.walk(_get_upload_dirname(db_task)):
                 fullnames = map(lambda f: os.path.join(root, f), files)
-                videos = list(filter(lambda x: _get_mime(x) == 'video', fullnames))
+                videos = list(filter(lambda x: get_mime(x) == 'video', fullnames))
                 if len(videos):
                     video = videos[0]
                     break
@@ -100,7 +100,7 @@ def fill_task_meta_data_forward(apps, schema_editor):
             filenames = []
             for root, _, files in os.walk(_get_upload_dirname(db_task)):
                 fullnames = map(lambda f: os.path.join(root, f), files)
-                images = filter(lambda x: _get_mime(x) == 'image', fullnames)
+                images = filter(lambda x: get_mime(x) == 'image', fullnames)
                 filenames.extend(images)
             filenames.sort()
 
diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
index b264a3f88db0..7d1fbd188953 100644
--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@@ -7,9 +7,6 @@
 import sys
 import rq
 import shutil
-import subprocess
-import tempfile
-import numpy as np
 from PIL import Image
 from traceback import print_exception
 from ast import literal_eval
@@ -17,16 +14,11 @@
 from urllib import parse as urlparse
 from urllib import request as urlrequest
 
-import mimetypes
-_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
-_MEDIA_MIMETYPES_FILE = os.path.join(_SCRIPT_DIR, "media.mimetypes")
-mimetypes.init(files=[_MEDIA_MIMETYPES_FILE])
+from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES
 
 import django_rq
 from django.conf import settings
 from django.db import transaction
-from ffmpy import FFmpeg
-from pyunpack import Archive
 from distutils.dir_util import copy_tree
 
 from . import models
@@ -51,49 +43,6 @@ def rq_handler(job, exc_type, exc_value, traceback):
 
 ############################# Internal implementation for server API
 
-class _FrameExtractor:
-    def __init__(self, source_path, compress_quality, step=1, start=0, stop=0):
-        # translate inversed range 1:95 to 2:32
-        translated_quality = 96 - compress_quality
-        translated_quality = round((((translated_quality - 1) * (31 - 2)) / (95 - 1)) + 2)
-        self.source = source_path
-        self.output = tempfile.mkdtemp(prefix='cvat-', suffix='.data')
-        target_path = os.path.join(self.output, '%d.jpg')
-        output_opts = '-start_number 0 -b:v 10000k -vsync 0 -an -y -q:v ' + str(translated_quality)
-        filters = ''
-        if stop > 0:
-            filters = 'between(n,' + str(start) + ',' + str(stop) + ')'
-        elif start > 0:
-            filters = 'gte(n,' + str(start) + ')'
-        if step > 1:
-            filters += ('*' if filters else '') + 'not(mod(n-' + str(start) + ',' + str(step) + '))'
-        if filters:
-            filters = "select=\"'" + filters + "'\""
-        if filters:
-            output_opts += ' -vf ' + filters
-        ff = FFmpeg(
-            inputs  = {source_path: None},
-            outputs = {target_path: output_opts})
-
-        slogger.glob.info("FFMpeg cmd: {} ".format(ff.cmd))
-        ff.run()
-
-    def getframepath(self, k):
-        return "{0}/{1}.jpg".format(self.output, k)
-
-    def __del__(self):
-        if self.output:
-            shutil.rmtree(self.output)
-
-    def __getitem__(self, k):
-        return self.getframepath(k)
-
-    def __iter__(self):
-        i = 0
-        while os.path.exists(self.getframepath(i)):
-            yield self[i]
-            i += 1
-
 def make_image_meta_cache(db_task):
     with open(db_task.get_image_meta_cache_path(), 'w') as meta_file:
         cache = {
@@ -111,7 +60,7 @@ def make_image_meta_cache(db_task):
             filenames = []
             for root, _, files in os.walk(db_task.get_upload_dirname()):
                 fullnames = map(lambda f: os.path.join(root, f), files)
-                images = filter(lambda x: _get_mime(x) == 'image', fullnames)
+                images = filter(lambda x: get_mime(x) == 'image', fullnames)
                 filenames.extend(images)
             filenames.sort()
 
@@ -135,31 +84,6 @@ def get_image_meta_cache(db_task):
         with open(db_task.get_image_meta_cache_path()) as meta_cache_file:
             return literal_eval(meta_cache_file.read())
 
-
-def _get_mime(name):
-    mime = mimetypes.guess_type(name)
-    mime_type = mime[0]
-    encoding = mime[1]
-    # zip, rar, tar, tar.gz, tar.bz2, 7z, cpio
-    supportedArchives = ['application/zip', 'application/x-rar-compressed',
-        'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
-        'gzip', 'bzip2']
-    if mime_type is not None:
-        if mime_type.startswith('video'):
-            return 'video'
-        elif mime_type in supportedArchives or encoding in supportedArchives:
-            return 'archive'
-        elif mime_type.startswith('image'):
-            return 'image'
-        else:
-            return 'unknown'
-    else:
-        if os.path.isdir(name):
-            return 'directory'
-        else:
-            return 'unknown'
-
-
 def _copy_data_from_share(server_files, upload_dir):
     job = rq.get_current_job()
     job.meta['status'] = 'Data are being copied from share..'
@@ -176,74 +100,6 @@ def _copy_data_from_share(server_files, upload_dir):
                 os.makedirs(target_dir)
             shutil.copyfile(source_path, target_path)
 
-def _unpack_archive(archive, upload_dir):
-    job = rq.get_current_job()
-    job.meta['status'] = 'Archive is being unpacked..'
-    job.save_meta()
-
-    Archive(archive).extractall(upload_dir)
-    os.remove(archive)
-
-def _copy_video_to_task(video, db_task, step):
-    job = rq.get_current_job()
-    job.meta['status'] = 'Video is being extracted..'
-    job.save_meta()
-
-    extractor = _FrameExtractor(video, db_task.image_quality,
-        step, db_task.start_frame, db_task.stop_frame)
-    for frame, image_orig_path in enumerate(extractor):
-        image_dest_path = db_task.get_frame_path(frame)
-        db_task.size += 1
-        dirname = os.path.dirname(image_dest_path)
-        if not os.path.exists(dirname):
-            os.makedirs(dirname)
-        shutil.copyfile(image_orig_path, image_dest_path)
-    if db_task.stop_frame == 0:
-        db_task.stop_frame = db_task.start_frame + (db_task.size - 1) * step
-
-    image = Image.open(db_task.get_frame_path(0))
-    models.Video.objects.create(task=db_task, path=video,
-        width=image.width, height=image.height)
-    image.close()
-
-def _copy_images_to_task(upload_dir, db_task):
-    image_paths = []
-    for root, _, files in os.walk(upload_dir):
-        paths = map(lambda f: os.path.join(root, f), files)
-        paths = filter(lambda x: _get_mime(x) == 'image', paths)
-        image_paths.extend(paths)
-    image_paths.sort()
-
-    db_images = []
-    if len(image_paths):
-        job = rq.get_current_job()
-        for frame, image_orig_path in enumerate(image_paths):
-            progress = frame * 100 // len(image_paths)
-            job.meta['status'] = 'Images are being compressed.. {}%'.format(progress)
-            job.save_meta()
-            image_dest_path = db_task.get_frame_path(frame)
-            db_task.size += 1
-            dirname = os.path.dirname(image_dest_path)
-            if not os.path.exists(dirname):
-                os.makedirs(dirname)
-            image = Image.open(image_orig_path)
-            # Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion
-            if image.mode == "I":
-                # Image mode is 32bit integer pixels.
-                # Autoscale pixels by factor 2**8 / im_data.max() to fit into 8bit
-                im_data = np.array(image)
-                im_data = im_data * (2**8 / im_data.max())
-                image = Image.fromarray(im_data.astype(np.int32))
-            image = image.convert('RGB')
-            image.save(image_dest_path, quality=db_task.image_quality, optimize=True)
-            db_images.append(models.Image(task=db_task, path=image_orig_path,
-                frame=frame, width=image.width, height=image.height))
-            image.close()
-
-        models.Image.objects.bulk_create(db_images)
-    else:
-        raise ValueError("Image files were not found")
-
 def _save_task_to_db(db_task):
     job = rq.get_current_job()
     job.meta['status'] = 'Task is being saved in database'
@@ -296,7 +152,7 @@ def _validate_data(data):
         if '..' in path.split(os.path.sep):
             raise ValueError("Don't use '..' inside file paths")
         full_path = os.path.abspath(os.path.join(share_root, path))
-        if 'directory' == _get_mime(full_path):
+        if 'directory' == get_mime(full_path):
             server_files['dirs'].append(path)
         else:
             server_files['files'].append(path)
@@ -308,43 +164,42 @@ def _validate_data(data):
         if not [ f_name for f_name in server_files['files'] if f_name.startswith(dir_name)]]
 
     def count_files(file_mapping, counter):
-        archive = None
-        video = None
         for rel_path, full_path in file_mapping.items():
-            mime = _get_mime(full_path)
-            counter[mime] += 1
-            if mime == "archive":
-                archive = rel_path
-            elif mime == "video":
-                video = rel_path
-        return video, archive
+            mime = get_mime(full_path)
+            counter[mime].append(rel_path)
 
-    counter = {"image": 0, "video": 0, "archive": 0, "directory": 0}
+    counter = { media_type: [] for media_type in MEDIA_TYPES.keys() }
 
-    client_video, client_archive = count_files(
+    count_files(
         file_mapping={ f:f for f in data['remote_files'] or data['client_files']},
         counter=counter,
     )
 
-    server_video, server_archive = count_files(
+    count_files(
         file_mapping={ f:os.path.abspath(os.path.join(share_root, f)) for f in data['server_files']},
         counter=counter,
     )
 
-    num_videos = counter["video"]
-    num_archives = counter["archive"]
-    num_images = counter["image"] + counter["directory"]
-    if (num_videos > 1 or num_archives > 1 or
-        (num_videos == 1 and num_archives + num_images > 0) or
-        (num_archives == 1 and num_videos + num_images > 0) or
-        (num_images > 0 and num_archives + num_videos > 0)):
+    unique_entries = 0
+    multiple_entries = 0
+    for media_type, media_config in MEDIA_TYPES.items():
+        if counter[media_type]:
+            if media_config['unique']:
+                unique_entries += len(counter[media_type])
+            else:
+                multiple_entries += len(counter[media_type])
+
+    if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1:
+        unique_types = ', '.join([k for k, v in MEDIA_TYPES.items() if v['unique']])
+        multiply_types = ', '.join([k for k, v in MEDIA_TYPES.items() if not v['unique']])
+        count = ', '.join(['{} {}(s)'.format(len(v), k) for k, v in counter.items()])
+        raise ValueError('Only one {} or many {} can be used simultaneously, \
+            but {} found.'.format(unique_types, multiply_types, count))
 
-        raise ValueError("Only one archive, one video or many images can be \
-            dowloaded simultaneously. {} image(s), {} dir(s), {} video(s), {} \
-            archive(s) found".format(counter['image'], counter['directory'],
-                counter['video'], counter['archive']))
+    if unique_entries == 0 and multiple_entries == 0:
+        raise ValueError('No media data found')
 
-    return client_video or server_video, client_archive or server_archive
+    return counter
 
 def _download_data(urls, upload_dir):
     job = rq.get_current_job()
@@ -382,25 +237,72 @@ def _create_thread(tid, data):
         raise NotImplementedError("Adding more data is not implemented")
 
     upload_dir = db_task.get_upload_dirname()
+
     if data['remote_files']:
         data['remote_files'] = _download_data(data['remote_files'], upload_dir)
-    video, archive = _validate_data(data)
+
+    media = _validate_data(data)
 
     if data['server_files']:
         _copy_data_from_share(data['server_files'], upload_dir)
 
-    if archive:
-        archive = os.path.join(upload_dir, archive)
-        _unpack_archive(archive, upload_dir)
+    job = rq.get_current_job()
+    job.meta['status'] = 'Media files is being extracted...'
+    job.save_meta()
 
-    if video:
-        db_task.mode = "interpolation"
-        video = os.path.join(upload_dir, video)
-        _copy_video_to_task(video, db_task, db_task.get_frame_step())
+    db_images = []
+    extractors = []
+    length = 0
+    for media_type, media_files in media.items():
+        if not media_files:
+            continue
+
+        extractor = MEDIA_TYPES[media_type]['extractor'](
+            source_path=[os.path.join(upload_dir, f) for f in media_files],
+            dest_path=upload_dir,
+            image_quality=db_task.image_quality,
+            step=db_task.get_frame_step(),
+            start=db_task.start_frame,
+            stop=db_task.stop_frame,
+        )
+        length += len(extractor)
+        db_task.mode = MEDIA_TYPES[media_type]['mode']
+        extractors.append(extractor)
+
+    for extractor in extractors:
+        for frame, image_orig_path in enumerate(extractor):
+            image_dest_path = db_task.get_frame_path(db_task.size)
+            dirname = os.path.dirname(image_dest_path)
+
+            if not os.path.exists(dirname):
+                os.makedirs(dirname)
+
+            if db_task.mode == 'interpolation':
+                extractor.save_image(frame, image_dest_path)
+            else:
+                width, height = extractor.save_image(frame, image_dest_path)
+                db_images.append(models.Image(
+                    task=db_task,
+                    path=image_orig_path,
+                    frame=db_task.size,
+                    width=width, height=height))
+
+            db_task.size += 1
+            progress = frame * 100 // length
+            job.meta['status'] = 'Images are being compressed... {}%'.format(progress)
+            job.save_meta()
+
+    if db_task.mode == 'interpolation':
+        image = Image.open(db_task.get_frame_path(0))
+        models.Video.objects.create(
+            task=db_task,
+            path=extractors[0].get_source_name(),
+            width=image.width, height=image.height)
+        image.close()
+        if db_task.stop_frame == 0:
+            db_task.stop_frame = db_task.start_frame + (db_task.size - 1) * db_task.get_frame_step()
     else:
-        db_task.mode = "annotation"
-        _copy_images_to_task(upload_dir, db_task)
+        models.Image.objects.bulk_create(db_images)
 
     slogger.glob.info("Founded frames {} for task #{}".format(db_task.size, tid))
     _save_task_to_db(db_task)
-