From bd86cd6ef5745be8c71358934bbea7002e7e2206 Mon Sep 17 00:00:00 2001
From: Clive Cox <cc@seldon.io>
Date: Tue, 6 Aug 2019 13:51:30 +0100
Subject: [PATCH] Allow predict for sklearn standalone server as wll as
 predict_proba

---
 doc/source/servers/sklearn.md                 |  24 ++
 .../sklearnserver/samples/iris_predict.yaml   |  18 ++
 .../sklearnserver/SKLearnServer.py            |  30 ++-
 servers/sklearnserver/test/sklearn_iris.ipynb | 226 ++++++++++++++----
 4 files changed, 244 insertions(+), 54 deletions(-)
 create mode 100644 servers/sklearnserver/samples/iris_predict.yaml

diff --git a/doc/source/servers/sklearn.md b/doc/source/servers/sklearn.md
index 874798867e..9712c9eacc 100644
--- a/doc/source/servers/sklearn.md
+++ b/doc/source/servers/sklearn.md
@@ -27,5 +27,29 @@ spec:
 
 ```
 
+## Sklearn Method
+
+By default the server will call `predict_proba` on your loaded model/pipeline. If you wish for it to call `predict` instead you can pass a parameter `method` and set it to `predict`. For example:
+
+```
+apiVersion: machinelearning.seldon.io/v1alpha2
+kind: SeldonDeployment
+metadata:
+  name: sklearn
+spec:
+  name: iris-predict
+  predictors:
+  - graph:
+      children: []
+      implementation: SKLEARN_SERVER
+      modelUri: gs://seldon-models/sklearn/iris
+      name: classifier
+      parameters:
+        - name: method
+          type: STRING
+          value: predict
+    name: default
+    replicas: 1
+```
 
 Try out a [worked notebook](../examples/server_examples.html)
\ No newline at end of file
diff --git a/servers/sklearnserver/samples/iris_predict.yaml b/servers/sklearnserver/samples/iris_predict.yaml
new file mode 100644
index 0000000000..5a53365c46
--- /dev/null
+++ b/servers/sklearnserver/samples/iris_predict.yaml
@@ -0,0 +1,18 @@
+apiVersion: machinelearning.seldon.io/v1alpha2
+kind: SeldonDeployment
+metadata:
+  name: sklearn
+spec:
+  name: iris-predict
+  predictors:
+  - graph:
+      children: []
+      implementation: SKLEARN_SERVER
+      modelUri: gs://seldon-models/sklearn/iris
+      name: classifier
+      parameters:
+        - name: method
+          type: STRING
+          value: predict
+    name: default
+    replicas: 1
diff --git a/servers/sklearnserver/sklearnserver/SKLearnServer.py b/servers/sklearnserver/sklearnserver/SKLearnServer.py
index d37742afea..628c677c1b 100644
--- a/servers/sklearnserver/sklearnserver/SKLearnServer.py
+++ b/servers/sklearnserver/sklearnserver/SKLearnServer.py
@@ -4,25 +4,41 @@
 from seldon_core.user_model import SeldonComponent
 from typing import Dict, List, Union, Iterable
 import os
+import logging
+
+logger = logging.getLogger(__name__)
+
 
 JOBLIB_FILE = "model.joblib"
 
+
 class SKLearnServer(SeldonComponent):
-    def __init__(self, model_uri: str):
+    def __init__(self, model_uri: str = None,  method: str = "predict_proba"):
         super().__init__()
         self.model_uri = model_uri
+        self.method = method
         self.ready = False
+        print("Model uri:",self.model_uri)
+        print("method:",self.method)
+        self.load()
 
     def load(self):
         print("load")
         model_file = os.path.join(seldon_core.Storage.download(self.model_uri), JOBLIB_FILE)
-        print("model file",model_file)
+        print("model file", model_file)
         self._joblib = joblib.load(model_file)
         self.ready = True
 
     def predict(self, X: np.ndarray, names: Iterable[str], meta: Dict = None) -> Union[np.ndarray, List, str, bytes]:
-        print("predict")
-        if not self.ready:
-            self.load()
-        result = self._joblib.predict(X)
-        return result
+        try:
+            if not self.ready:
+                self.load()
+            if self.method == "predict_proba":
+                logger.info("Calling predict_proba")
+                result = self._joblib.predict_proba(X)
+            else:
+                logger.info("Calling predict")
+                result = self._joblib.predict(X)
+            return result
+        except Exception as ex:
+            logging.exception("Exception during predict")
diff --git a/servers/sklearnserver/test/sklearn_iris.ipynb b/servers/sklearnserver/test/sklearn_iris.ipynb
index 0f056adb46..a1ffc1bb09 100644
--- a/servers/sklearnserver/test/sklearn_iris.ipynb
+++ b/servers/sklearnserver/test/sklearn_iris.ipynb
@@ -28,9 +28,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/clive/anaconda3/envs/seldon-core/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "/home/clive/anaconda3/envs/seldon-core/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
       "  FutureWarning)\n",
-      "/home/clive/anaconda3/envs/seldon-core/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "/home/clive/anaconda3/envs/seldon-core/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
       "  \"this warning.\", FutureWarning)\n"
      ]
     }
@@ -79,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -102,6 +102,9 @@
       "Downloading https://files.pythonhosted.org/packages/5d/bd/c0feba81fb60e231cf40fc8a322ed5873c90ef7711795508692b1481a4ae/scipy-1.3.0-cp37-cp37m-manylinux1_x86_64.whl (25.2MB)\n",
       "Installing collected packages: scipy, scikit-learn, joblib\n",
       "Successfully installed joblib-0.13.2 scikit-learn-0.20.3 scipy-1.3.0\n",
+      "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
+      "WARNING: You are using pip version 19.1.1, however version 19.2.1 is available.\n",
+      "You should consider upgrading via the 'pip install --upgrade pip' command.\n",
       "Build completed successfully\n"
      ]
     }
@@ -112,14 +115,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "8974f8bddcf919052aaefc8e563afb8b37bef38595874355b6868e0fa827a047\r\n"
+      "85ebfc6c41ef145b578077809af81a23ecb6c7ffe261645b098466d6fcda6ecb\r\n"
      ]
     }
    ],
@@ -136,29 +139,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "----------------------------------------\n",
-      "SENDING NEW REQUEST:\n",
-      "\n",
-      "[[6.297 2.485 5.03  0.09 ]]\n",
-      "RECEIVED RESPONSE:\n",
-      "meta {\n",
-      "}\n",
-      "data {\n",
-      "  ndarray {\n",
-      "    values {\n",
-      "      number_value: 1.0\n",
-      "    }\n",
-      "  }\n",
-      "}\n",
-      "\n",
-      "\n"
+      "----------------------------------------\r\n",
+      "SENDING NEW REQUEST:\r\n",
+      "\r\n",
+      "[[6.834 4.605 7.238 2.832]]\r\n",
+      "RECEIVED RESPONSE:\r\n",
+      "meta {\r\n",
+      "}\r\n",
+      "data {\r\n",
+      "  names: \"t:0\"\r\n",
+      "  names: \"t:1\"\r\n",
+      "  names: \"t:2\"\r\n",
+      "  ndarray {\r\n",
+      "    values {\r\n",
+      "      list_value {\r\n",
+      "        values {\r\n",
+      "          number_value: 7.698570018103115e-05\r\n",
+      "        }\r\n",
+      "        values {\r\n",
+      "          number_value: 0.037101590872860316\r\n",
+      "        }\r\n",
+      "        values {\r\n",
+      "          number_value: 0.9628214234269586\r\n",
+      "        }\r\n",
+      "      }\r\n",
+      "    }\r\n",
+      "  }\r\n",
+      "}\r\n",
+      "\r\n",
+      "\r\n"
      ]
     }
    ],
@@ -168,7 +184,73 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sklearnserver\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!docker rm sklearnserver --force"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "d7298dbeaee7508c995d817901b84cf983397003cd1eb74dabc46fd14dad49b0\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!docker run --rm -d --name \"sklearnserver\"  -p 5000:5000 -e PREDICTIVE_UNIT_PARAMETERS='[{\"type\":\"STRING\",\"name\":\"method\",\"value\":\"predict\"},{\"type\":\"STRING\",\"name\":\"model_uri\",\"value\":\"file:///model\"}]' -v ${PWD}:/model seldonio/sklearnserver_rest:0.1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "----------------------------------------\r\n",
+      "SENDING NEW REQUEST:\r\n",
+      "\r\n",
+      "[[7.22  3.214 1.305 2.948]]\r\n",
+      "RECEIVED RESPONSE:\r\n",
+      "meta {\r\n",
+      "}\r\n",
+      "data {\r\n",
+      "  ndarray {\r\n",
+      "    values {\r\n",
+      "      number_value: 0.0\r\n",
+      "    }\r\n",
+      "  }\r\n",
+      "}\r\n",
+      "\r\n",
+      "\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!seldon-core-tester contract.json 0.0.0.0 5000 -p"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -192,7 +274,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -215,6 +297,9 @@
       "Downloading https://files.pythonhosted.org/packages/5d/bd/c0feba81fb60e231cf40fc8a322ed5873c90ef7711795508692b1481a4ae/scipy-1.3.0-cp37-cp37m-manylinux1_x86_64.whl (25.2MB)\n",
       "Installing collected packages: scipy, scikit-learn, joblib\n",
       "Successfully installed joblib-0.13.2 scikit-learn-0.20.3 scipy-1.3.0\n",
+      "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
+      "WARNING: You are using pip version 19.1.1, however version 19.2.1 is available.\n",
+      "You should consider upgrading via the 'pip install --upgrade pip' command.\n",
       "Build completed successfully\n"
      ]
     }
@@ -225,14 +310,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "e7eda83721402970145f00541e07a8cc67c9b21bd2d898ec49687bb897441c7e\r\n"
+      "9d0218b348e186596717736035bf67fc75f91ec0bdf8152b9d1ad9734d842d54\r\n"
      ]
     }
    ],
@@ -249,29 +334,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "----------------------------------------\n",
-      "SENDING NEW REQUEST:\n",
-      "\n",
-      "[[4.637 4.314 2.837 2.695]]\n",
-      "RECEIVED RESPONSE:\n",
-      "meta {\n",
-      "}\n",
-      "data {\n",
-      "  ndarray {\n",
-      "    values {\n",
-      "      number_value: 0.0\n",
-      "    }\n",
-      "  }\n",
-      "}\n",
-      "\n",
-      "\n"
+      "----------------------------------------\r\n",
+      "SENDING NEW REQUEST:\r\n",
+      "\r\n",
+      "[[6.538 4.217 6.519 0.217]]\r\n",
+      "RECEIVED RESPONSE:\r\n",
+      "meta {\r\n",
+      "}\r\n",
+      "data {\r\n",
+      "  names: \"t:0\"\r\n",
+      "  names: \"t:1\"\r\n",
+      "  names: \"t:2\"\r\n",
+      "  ndarray {\r\n",
+      "    values {\r\n",
+      "      list_value {\r\n",
+      "        values {\r\n",
+      "          number_value: 0.003966041860793068\r\n",
+      "        }\r\n",
+      "        values {\r\n",
+      "          number_value: 0.8586797745038719\r\n",
+      "        }\r\n",
+      "        values {\r\n",
+      "          number_value: 0.13735418363533516\r\n",
+      "        }\r\n",
+      "      }\r\n",
+      "    }\r\n",
+      "  }\r\n",
+      "}\r\n",
+      "\r\n",
+      "\r\n"
      ]
     }
    ],
@@ -288,7 +386,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -298,14 +396,20 @@
       "----------------------------------------\r\n",
       "SENDING NEW REQUEST:\r\n",
       "\r\n",
-      "[[7.991 3.926 8.303 1.303]]\r\n",
+      "[[4.404 4.341 5.101 0.219]]\r\n",
       "RECEIVED RESPONSE:\r\n",
       "meta {\r\n",
       "}\r\n",
       "data {\r\n",
+      "  names: \"t:0\"\r\n",
+      "  names: \"t:1\"\r\n",
+      "  names: \"t:2\"\r\n",
       "  tensor {\r\n",
       "    shape: 1\r\n",
-      "    values: 2.0\r\n",
+      "    shape: 3\r\n",
+      "    values: 0.10494571335925532\r\n",
+      "    values: 0.6017695103262425\r\n",
+      "    values: 0.29328477631450234\r\n",
       "  }\r\n",
       "}\r\n",
       "\r\n",
@@ -319,7 +423,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -334,6 +438,34 @@
     "!docker rm sklearnserver --force"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def x(a=None,b=2):\n",
+    "    print(a,b)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "x(b=3,a=1)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,