Merge pull request #322 from gnes-ai/incep_encoder

fix(video-decoder): support none spliter
gnes-ai · Oct 11, 2019 · e64bc7a · e64bc7a
2 parents 72c6d8f + a591b89
commit e64bc7a
Show file tree

Hide file tree

Showing 3 changed files with 86 additions and 0 deletions.
diff --git a/gnes/encoder/__init__.py b/gnes/encoder/__init__.py
@@ -45,6 +45,7 @@
     'VggishEncoder': 'audio.vggish',
     'YouTube8MFeatureExtractor': 'video.yt8m_feature_extractor',
     'YouTube8MEncoder': 'video.yt8m_model',
+    'InceptionVideoEncoder': 'video.inception',
     'QuantizerEncoder': 'numeric.quantizer',
     'CharEmbeddingEncoder': 'text.char'
 }

diff --git a/gnes/encoder/video/inception.py b/gnes/encoder/video/inception.py
@@ -0,0 +1,83 @@
+#  Tencent is pleased to support the open source community by making GNES available.
+#
+#  Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from typing import List
+
+import numpy as np
+from PIL import Image
+
+from ..base import BaseVideoEncoder
+from ...helper import batching, get_first_available_gpu
+
+
+class InceptionVideoEncoder(BaseVideoEncoder):
+    batch_size = 64
+
+    def __init__(self,
+                 model_dir: str,
+                 select_layer: str = 'PreLogitsFlatten',
+                 *args,
+                 **kwargs):
+        super().__init__(*args, **kwargs)
+        self.model_dir = model_dir
+        self.select_layer = select_layer
+        self.inception_size_x = 299
+        self.inception_size_y = 299
+
+    def post_init(self):
+        import tensorflow as tf
+        from ..image.inception_cores.inception_v4 import inception_v4
+        from ..image.inception_cores.inception_utils import inception_arg_scope
+        import os
+        os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu())
+
+        g = tf.Graph()
+        with g.as_default():
+            arg_scope = inception_arg_scope()
+            inception_v4.default_image_size = self.inception_size_x
+            self.inputs = tf.placeholder(
+                tf.float32,
+                (None, self.inception_size_x, self.inception_size_y, 3))
+
+            with tf.contrib.slim.arg_scope(arg_scope):
+                self.logits, self.end_points = inception_v4(
+                    self.inputs, is_training=False, dropout_keep_prob=1.0)
+
+            config = tf.ConfigProto(log_device_placement=False)
+            if self.on_gpu:
+                config.gpu_options.allow_growth = True
+            self.sess = tf.Session(config=config)
+            self.saver = tf.train.Saver()
+            self.saver.restore(self.sess, self.model_dir)
+
+    def encode(self, data: List['np.ndarray'], *args,
+               **kwargs) -> List['np.ndarray']:
+        v_len = [len(v) for v in data]
+        pos_start = [0] + [sum(v_len[:i + 1]) for i in range(len(v_len) - 1)]
+        pos_end = [sum(v_len[:i + 1]) for i in range(len(v_len))]
+
+        _resize = lambda x: np.array(Image.fromarray(x).resize((self.inception_size_x, self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.
+
+        images = [_resize(im) for v in data for im in v]
+
+        @batching
+        def _encode(self, data):
+            _, end_points_ = self.sess.run((self.logits, self.end_points),
+                                           feed_dict={self.inputs: data})
+            return end_points_[self.select_layer]
+
+        encodes = _encode(self, images).astype(np.float32)
+
+        return [encodes[s:e].copy() for s, e in zip(pos_start, pos_end)]
diff --git a/gnes/preprocessor/video/video_decoder.py b/gnes/preprocessor/video/video_decoder.py
@@ -60,6 +60,8 @@ def apply(self, doc: 'gnes_pb2.Document') -> None:
                     c.blob.CopyFrom(array2blob(frame))
                     c.offset = i
                     c.weight = 1.0
+            elif self.chunk_spliter == 'none':
+                pass
             elif self.chunk_spliter == 'shot':
                 raise NotImplementedError
             else: