[Auto] improvements context when gpu involved, speed up predict for i…

…mage classification (#1636) * add ctx cast * fix * debug * debug * fix * fix * batch predict * batch inference for predict * fix * simplify * fix * fix * fix * stack -> concat * fix * fix * fix * fix * fix * thread pool * thread pool * thread pool * use dataloader * use dataloader * flatten * fix * add tests * fix lint
dmlc · Mar 25, 2021 · 07bc427 · 07bc427
1 parent 0a85a2e
commit 07bc427
Show file tree

Hide file tree

Showing 8 changed files with 221 additions and 69 deletions.
diff --git a/gluoncv/auto/estimators/base_estimator.py b/gluoncv/auto/estimators/base_estimator.py
@@ -10,6 +10,7 @@
 import pandas as pd
 from ...utils import random as _random
 from ...utils.filesystem import temporary_filename
+from .utils import _suggest_load_context
 
 logging.basicConfig(level=logging.INFO)
 
@@ -153,8 +154,9 @@ def fit(self, train_data, val_data=None, train_size=0.9, random_state=None,
             assert val_data is not None, \
                 "Please provide `val_data` as we do not know how to split `train_data` of type: \
                 {}".format(type(train_data))
-            return self._fit(train_data, val_data, time_limit=time_limit) if not resume \
+            ret = self._fit(train_data, val_data, time_limit=time_limit) if not resume \
                 else self._resume_fit(train_data, val_data, time_limit=time_limit)
+            return self._reload_best(ret)
 
         os.makedirs(self._logdir, exist_ok=True)
         if val_data is None:
@@ -166,11 +168,13 @@ def fit(self, train_data, val_data=None, train_size=0.9, random_state=None,
             val = train_data[~split_mask]
             self._logger.info('Randomly split train_data into train[%d]/validation[%d] splits.',
                               len(train), len(val))
-            return self._fit(train, val, time_limit=time_limit) if not resume else \
+            ret = self._fit(train, val, time_limit=time_limit) if not resume else \
                 self._resume_fit(train, val, time_limit=time_limit)
+            return self._reload_best(ret)
 
-        return self._fit(train_data, val_data, time_limit=time_limit) if not resume else \
+        ret = self._fit(train_data, val_data, time_limit=time_limit) if not resume else \
             self._resume_fit(train_data, val_data, time_limit=time_limit)
+        return self._reload_best(ret)
 
     def evaluate(self, val_data):
         """Evaluate estimator on validation data.
@@ -205,10 +209,24 @@ def predict_feature(self, x):
         """
         return self._predict_feature(x)
 
-    def _predict(self, x):
+    def _reload_best(self, return_value):
+        """Applying the best checkpoint before return"""
+        cp = return_value.get('checkpoint', '')
+        if not cp:
+            return return_value
+        self._logger.info('Applying the state from the best checkpoint...')
+        try:
+            tmp = self.load(cp)
+            self.__dict__.update(tmp.__dict__)
+        except:
+            self._logger.warning(
+                'Unable to resume the state from the best checkpoint, using the latest state.')
+        return return_value
+
+    def _predict(self, x, **kwargs):
         raise NotImplementedError
 
-    def _predict_feature(self, x):
+    def _predict_feature(self, x, **kwargs):
         raise NotImplementedError
 
     def _fit(self, train_data, val_data, time_limit=math.inf):
@@ -263,7 +281,12 @@ def reset_ctx(self, ctx=None):
             if isinstance(self.net, mx.gluon.Block):
                 for c in ctx_list:
                     assert isinstance(c, mx.Context)
-                self.net.reset_ctx(ctx_list)
+                if hasattr(self.net, 'reset_ctx'):
+                    self.net.reset_ctx(ctx_list)
+                else:
+                    self.net.collect_params().reset_ctx(ctx_list)
+                self.ctx = ctx_list
+                done = True
         except ImportError:
             pass
         if not done:
@@ -282,17 +305,28 @@ def save(self, filename):
         self._logger.info('Pickled to %s', filename)
 
     @classmethod
-    def load(cls, filename):
+    def load(cls, filename, ctx='auto'):
         """Load the state from disk copy.
 
         Parameters
         ----------
         filename : str
             The file name to load from.
+        ctx: str, default is 'auto'
+            The context for reloaded model.
+            'auto': use previously saved context type if still available, fallback
+            to cpu if no gpu detected.
+            Use `cpu` if no GPU available.
+            'cpu': use cpu for inference regardless.
+            'gpu': use as many gpus available as possible.
+            [0, 2, 4, ...]: if a list or tuple of integers are provided, the context
+            will be [gpu(0), gpu(2), gpu(4)...]
         """
         with open(filename, 'rb') as fid:
             obj = pickle.load(fid)
             obj._logger.info('Unpickled from %s', filename)
+            new_ctx = _suggest_load_context(obj.net, ctx, obj.ctx)
+            obj.reset_ctx(new_ctx)
             return obj
 
     def __getstate__(self):

diff --git a/gluoncv/auto/estimators/center_net/center_net.py b/gluoncv/auto/estimators/center_net/center_net.py
@@ -1,11 +1,12 @@
 """CenterNet Estimator"""
-# pylint: disable=unused-variable,missing-function-docstring,abstract-method,logging-format-interpolation
+# pylint: disable=unused-variable,missing-function-docstring,abstract-method,logging-format-interpolation,arguments-differ
 import os
 import math
 import time
 import warnings
 from collections import OrderedDict
 
+from PIL import Image
 import pandas as pd
 import numpy as np
 import mxnet as mx
@@ -47,10 +48,12 @@ def __init__(self, config, logger=None, reporter=None):
         super(CenterNetEstimator, self).__init__(config, logger, reporter=reporter, name=None)
         self.last_train = None
 
-    def _predict(self, x):
+    def _predict(self, x, ctx_id=0):
         short_size = min(self._cfg.center_net.data_shape)
         if isinstance(x, str):
             x = load_test(x, short=short_size, max_size=1024)[0]
+        elif isinstance(x, Image.Image):
+            return self._predict(np.array(x))
         elif isinstance(x, np.ndarray):
             return self._predict(mx.nd.array(x))
         elif isinstance(x, mx.nd.NDArray):
@@ -59,17 +62,19 @@ def _predict(self, x):
             x = transform_test(x, short=short_size, max_size=1024)[0]
         elif isinstance(x, pd.DataFrame):
             assert 'image' in x.columns, "Expect column `image` for input images"
-            def _predict_merge(x):
-                y = self._predict(x)
+            def _predict_merge(x, ctx_id=0):
+                y = self._predict(x, ctx_id=ctx_id)
                 y['image'] = x
                 return y
-            return pd.concat([_predict_merge(xx) for xx in x['image']]).reset_index(drop=True)
+            return pd.concat([_predict_merge(xx, ctx_id=ii % len(self.ctx)) \
+                for ii, xx in enumerate(x['image'])]).reset_index(drop=True)
         elif isinstance(x, (list, tuple)):
-            return pd.concat([self._predict(xx) for xx in x]).reset_index(drop=True)
+            return pd.concat([self._predict(xx, ctx_id=ii % len(self.ctx)) \
+                for ii, xx in enumerate(x)]).reset_index(drop=True)
         else:
             raise ValueError('Input is not supported: {}'.format(type(x)))
         height, width = x.shape[2:4]
-        x = x.as_in_context(self.ctx[0])
+        x = x.as_in_context(self.ctx[ctx_id])
         ids, scores, bboxes = [xx[0].asnumpy() for xx in self.net(x)]
         bboxes[:, (0, 2)] /= width
         bboxes[:, (1, 3)] /= height

diff --git a/gluoncv/auto/estimators/faster_rcnn/faster_rcnn.py b/gluoncv/auto/estimators/faster_rcnn/faster_rcnn.py
@@ -1,10 +1,11 @@
 """Faster RCNN Estimator."""
-# pylint: disable=logging-not-lazy,abstract-method,unused-variable,logging-format-interpolation
+# pylint: disable=logging-not-lazy,abstract-method,unused-variable,logging-format-interpolation,arguments-differ
 import os
 import math
 import time
 import warnings
 
+from PIL import Image
 import pandas as pd
 import numpy as np
 import mxnet as mx
@@ -296,11 +297,13 @@ def _evaluate(self, val_data):
                 eval_metric.update(det_bbox, det_id, det_score, gt_bbox, gt_id, gt_diff)
         return eval_metric.get()
 
-    def _predict(self, x):
+    def _predict(self, x, ctx_id=0):
         """Predict an individual example."""
         short_size = self.net.short[-1] if isinstance(self.net.short, (tuple, list)) else self.net.short
         if isinstance(x, str):
             x = load_test(x, short=short_size, max_size=1024)[0]
+        elif isinstance(x, Image.Image):
+            return self._predict(np.array(x))
         elif isinstance(x, np.ndarray):
             if len(x.shape) != 3 or x.shape[-1] != 3:
                 raise ValueError('array input with shape (h, w, 3) is required for predict')
@@ -309,17 +312,19 @@ def _predict(self, x):
             x = transform_test(x, short=short_size, max_size=1024)[0]
         elif isinstance(x, pd.DataFrame):
             assert 'image' in x.columns, "Expect column `image` for input images"
-            def _predict_merge(x):
-                y = self._predict(x)
+            def _predict_merge(x, ctx_id=0):
+                y = self._predict(x, ctx_id=ctx_id)
                 y['image'] = x
                 return y
-            return pd.concat([_predict_merge(xx) for xx in x['image']]).reset_index(drop=True)
+            return pd.concat([_predict_merge(xx, ctx_id=ii % len(self.ctx)) \
+                for ii, xx in enumerate(x['image'])]).reset_index(drop=True)
         elif isinstance(x, (list, tuple)):
-            return pd.concat([self._predict(xx) for xx in x]).reset_index(drop=True)
+            return pd.concat([self._predict(xx, ctx_id=ii % len(self.ctx)) \
+                for ii, xx in enumerate(x)]).reset_index(drop=True)
         else:
             raise ValueError('Input is not supported: {}'.format(type(x)))
         height, width = x.shape[2:4]
-        x = x.as_in_context(self.ctx[0])
+        x = x.as_in_context(self.ctx[ctx_id])
         ids, scores, bboxes = [xx[0].asnumpy() for xx in self.net(x)]
         bboxes[:, (0, 2)] /= width
         bboxes[:, (1, 3)] /= height

diff --git a/gluoncv/auto/estimators/image_classification/image_classification.py b/gluoncv/auto/estimators/image_classification/image_classification.py
@@ -1,10 +1,11 @@
 """Classification Estimator"""
-# pylint: disable=unused-variable,bad-whitespace, missing-function-docstring,logging-format-interpolation
+# pylint: disable=unused-variable,bad-whitespace,missing-function-docstring,logging-format-interpolation,arguments-differ
 import time
 import os
 import math
 import copy
 
+from PIL import Image
 import pandas as pd
 import numpy as np
 import mxnet as mx
@@ -415,28 +416,58 @@ def _evaluate(self, val_data):
         _, top5 = acc_top5.get()
         return top1, top5
 
-    def _predict(self, x):
+    def _predict_preprocess(self, x):
         resize = int(math.ceil(self.input_size / self._cfg.train.crop_ratio))
         if isinstance(x, str):
-            x = transform_eval(mx.image.imread(x), resize_short=resize, crop_size=self.input_size)
+            x = self._predict_preprocess(transform_eval(
+                mx.image.imread(x), resize_short=resize, crop_size=self.input_size))
+        elif isinstance(x, Image.Image):
+            x = self._predict_preprocess(np.array(x))
         elif isinstance(x, np.ndarray):
-            return self._predict(mx.nd.array(x))
+            x = self._predict_preprocess(mx.nd.array(x))
         elif isinstance(x, mx.nd.NDArray):
-            if len(x.shape) != 3 or x.shape[-1] != 3:
-                raise ValueError('array input with shape (h, w, 3) is required for predict')
-            x = transform_eval(x, resize_short=resize, crop_size=self.input_size)
-        elif isinstance(x, pd.DataFrame):
+            if len(x.shape) == 3 and x.shape[-1] == 3:
+                x = transform_eval(x, resize_short=resize, crop_size=self.input_size)
+            elif len(x.shape) == 4 and x.shape[1] == 3:
+                expected = (self.input_size, self.input_size)
+                assert x.shape[2:] == expected, "Expected: {}, given {}".format(expected, x.shape[2:])
+            elif x.shape[1] == 1:
+                # gray image to rgb
+                x = mx.nd.concat([x] * 3, dim=1)
+            else:
+                raise ValueError('array input with shape (h, w, 3) or (n, 3, h, w) is required for predict')
+        return x
+
+    def _predict(self, x, ctx_id=0):
+        x = self._predict_preprocess(x)
+        if isinstance(x, pd.DataFrame):
             assert 'image' in x.columns, "Expect column `image` for input images"
-            def _predict_merge(x):
-                y = self._predict(x)
-                y['image'] = x
-                return y
-            return pd.concat([_predict_merge(xx) for xx in x['image']]).reset_index(drop=True)
+            df = self._predict(tuple(x['image']))
+            return df.reset_index(drop=True)
         elif isinstance(x, (list, tuple)):
-            return pd.concat([self._predict(xx) for xx in x]).reset_index(drop=True)
-        else:
+            bs = self._cfg.valid.batch_size
+            self.net.hybridize()
+            results = []
+            topK = min(5, self.num_class)
+            loader = mx.gluon.data.DataLoader(
+                ImageListDataset(x, self._predict_preprocess), batch_size=bs, last_batch='keep')
+            idx = 0
+            for batch in loader:
+                batch = mx.gluon.utils.split_and_load(batch, ctx_list=self.ctx, even_split=False)
+                pred = [self.net(input) for input in batch]
+                for p in pred:
+                    for ii in range(p.shape[0]):
+                        ind = nd.topk(p[ii], k=topK).astype('int').asnumpy().flatten()
+                        probs = mx.nd.softmax(p[ii]).asnumpy().flatten()
+                        for k in range(topK):
+                            results.append({'class': self.classes[ind[k]],
+                                            'score': probs[ind[k]], 'id': ind[k], 'image': x[idx]})
+                        idx += 1
+            return pd.DataFrame(results)
+        elif not isinstance(x, mx.nd.NDArray):
             raise ValueError('Input is not supported: {}'.format(type(x)))
-        x = x.as_in_context(self.ctx[0])
+        assert len(x.shape) == 4 and x.shape[1] == 3, "Expect input to be (n, 3, h, w), given {}".format(x.shape)
+        x = x.as_in_context(self.ctx[ctx_id])
         pred = self.net(x)
         topK = min(5, self.num_class)
         ind = nd.topk(pred, k=topK)[0].astype('int').asnumpy().flatten()
@@ -463,25 +494,52 @@ def _get_feature_net(self):
             raise RuntimeError('Unable to modify the last fc layer in network, (output, fc) expected...')
         return self._feature_net
 
-    def _predict_feature(self, x):
-        resize = int(math.ceil(self.input_size / self._cfg.train.crop_ratio))
-        if isinstance(x, str):
-            x = transform_eval(mx.image.imread(x), resize_short=resize, crop_size=self.input_size)
-        elif isinstance(x, mx.nd.NDArray):
-            x = transform_eval(x, resize_short=resize, crop_size=self.input_size)
-        elif isinstance(x, pd.DataFrame):
+    def _predict_feature(self, x, ctx_id=0):
+        x = self._predict_preprocess(x)
+        if isinstance(x, pd.DataFrame):
             assert 'image' in x.columns, "Expect column `image` for input images"
-            def _predict_merge(x):
-                y = self._predict_feature(x)
-                y['image'] = x
-                return y
-            return pd.concat([_predict_merge(xx) for xx in x['image']]).reset_index(drop=True)
+            df = self._predict_feature(tuple(x['image']))
+            df['image'] = x['image']
+            return df.reset_index(drop=True)
         elif isinstance(x, (list, tuple)):
-            return pd.concat([self._predict_feature(xx) for xx in x]).reset_index(drop=True)
-        else:
+            assert isinstance(x[0], str), "expect image paths in list/tuple input"
+            bs = self._cfg.valid.batch_size
+            feat_net = self._get_feature_net()
+            feat_net.hybridize()
+            results = []
+            loader = mx.gluon.data.DataLoader(
+                ImageListDataset(x, self._predict_preprocess), batch_size=bs, last_batch='keep')
+            for batch in loader:
+                batch = mx.gluon.utils.split_and_load(batch, ctx_list=self.ctx, even_split=False)
+                feats = [feat_net(input) for input in batch]
+                for p in feats:
+                    for ii in range(p.shape[0]):
+                        feat = p[ii].asnumpy().flatten()
+                        results.append({'image_feature': feat})
+            df = pd.DataFrame(results)
+            df['image'] = x
+            return df
+        elif not isinstance(x, mx.nd.NDArray):
             raise ValueError('Input is not supported: {}'.format(type(x)))
-        x = x.as_in_context(self.ctx[0])
+        assert len(x.shape) == 4 and x.shape[1] == 3, "Expect input to be (n, 3, h, w), given {}".format(x.shape)
+        x = x.as_in_context(self.ctx[ctx_id])
         feat_net = self._get_feature_net()
-        feat = feat_net(x)[0].asnumpy().flatten()
-        df = pd.DataFrame({'image_feature': [feat]})
+        results = []
+        for ii in range(x.shape[0]):
+            feat = feat_net(x)[ii].asnumpy().flatten()
+            results.append({'image_feature': feat})
+        df = pd.DataFrame(results)
         return df
+
+class ImageListDataset(mx.gluon.data.Dataset):
+    """An internal image list dataset for batch predict"""
+    def __init__(self, imlist, fn):
+        self._imlist = imlist
+        self._fn = fn
+
+    def __getitem__(self, idx):
+        img = self._fn(self._imlist[idx])[0]
+        return img
+
+    def __len__(self):
+        return len(self._imlist)