From bd86cd6ef5745be8c71358934bbea7002e7e2206 Mon Sep 17 00:00:00 2001 From: Clive Cox Date: Tue, 6 Aug 2019 13:51:30 +0100 Subject: [PATCH] Allow predict for sklearn standalone server as wll as predict_proba --- doc/source/servers/sklearn.md | 24 ++ .../sklearnserver/samples/iris_predict.yaml | 18 ++ .../sklearnserver/SKLearnServer.py | 30 ++- servers/sklearnserver/test/sklearn_iris.ipynb | 226 ++++++++++++++---- 4 files changed, 244 insertions(+), 54 deletions(-) create mode 100644 servers/sklearnserver/samples/iris_predict.yaml diff --git a/doc/source/servers/sklearn.md b/doc/source/servers/sklearn.md index 874798867e..9712c9eacc 100644 --- a/doc/source/servers/sklearn.md +++ b/doc/source/servers/sklearn.md @@ -27,5 +27,29 @@ spec: ``` +## Sklearn Method + +By default the server will call `predict_proba` on your loaded model/pipeline. If you wish for it to call `predict` instead you can pass a parameter `method` and set it to `predict`. For example: + +``` +apiVersion: machinelearning.seldon.io/v1alpha2 +kind: SeldonDeployment +metadata: + name: sklearn +spec: + name: iris-predict + predictors: + - graph: + children: [] + implementation: SKLEARN_SERVER + modelUri: gs://seldon-models/sklearn/iris + name: classifier + parameters: + - name: method + type: STRING + value: predict + name: default + replicas: 1 +``` Try out a [worked notebook](../examples/server_examples.html) \ No newline at end of file diff --git a/servers/sklearnserver/samples/iris_predict.yaml b/servers/sklearnserver/samples/iris_predict.yaml new file mode 100644 index 0000000000..5a53365c46 --- /dev/null +++ b/servers/sklearnserver/samples/iris_predict.yaml @@ -0,0 +1,18 @@ +apiVersion: machinelearning.seldon.io/v1alpha2 +kind: SeldonDeployment +metadata: + name: sklearn +spec: + name: iris-predict + predictors: + - graph: + children: [] + implementation: SKLEARN_SERVER + modelUri: gs://seldon-models/sklearn/iris + name: classifier + parameters: + - name: method + type: STRING + value: predict + name: default + replicas: 1 diff --git a/servers/sklearnserver/sklearnserver/SKLearnServer.py b/servers/sklearnserver/sklearnserver/SKLearnServer.py index d37742afea..628c677c1b 100644 --- a/servers/sklearnserver/sklearnserver/SKLearnServer.py +++ b/servers/sklearnserver/sklearnserver/SKLearnServer.py @@ -4,25 +4,41 @@ from seldon_core.user_model import SeldonComponent from typing import Dict, List, Union, Iterable import os +import logging + +logger = logging.getLogger(__name__) + JOBLIB_FILE = "model.joblib" + class SKLearnServer(SeldonComponent): - def __init__(self, model_uri: str): + def __init__(self, model_uri: str = None, method: str = "predict_proba"): super().__init__() self.model_uri = model_uri + self.method = method self.ready = False + print("Model uri:",self.model_uri) + print("method:",self.method) + self.load() def load(self): print("load") model_file = os.path.join(seldon_core.Storage.download(self.model_uri), JOBLIB_FILE) - print("model file",model_file) + print("model file", model_file) self._joblib = joblib.load(model_file) self.ready = True def predict(self, X: np.ndarray, names: Iterable[str], meta: Dict = None) -> Union[np.ndarray, List, str, bytes]: - print("predict") - if not self.ready: - self.load() - result = self._joblib.predict(X) - return result + try: + if not self.ready: + self.load() + if self.method == "predict_proba": + logger.info("Calling predict_proba") + result = self._joblib.predict_proba(X) + else: + logger.info("Calling predict") + result = self._joblib.predict(X) + return result + except Exception as ex: + logging.exception("Exception during predict") diff --git a/servers/sklearnserver/test/sklearn_iris.ipynb b/servers/sklearnserver/test/sklearn_iris.ipynb index 0f056adb46..a1ffc1bb09 100644 --- a/servers/sklearnserver/test/sklearn_iris.ipynb +++ b/servers/sklearnserver/test/sklearn_iris.ipynb @@ -28,9 +28,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/home/clive/anaconda3/envs/seldon-core/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + "/home/clive/anaconda3/envs/seldon-core/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", - "/home/clive/anaconda3/envs/seldon-core/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + "/home/clive/anaconda3/envs/seldon-core/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", " \"this warning.\", FutureWarning)\n" ] } @@ -79,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -102,6 +102,9 @@ "Downloading https://files.pythonhosted.org/packages/5d/bd/c0feba81fb60e231cf40fc8a322ed5873c90ef7711795508692b1481a4ae/scipy-1.3.0-cp37-cp37m-manylinux1_x86_64.whl (25.2MB)\n", "Installing collected packages: scipy, scikit-learn, joblib\n", "Successfully installed joblib-0.13.2 scikit-learn-0.20.3 scipy-1.3.0\n", + "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n", + "WARNING: You are using pip version 19.1.1, however version 19.2.1 is available.\n", + "You should consider upgrading via the 'pip install --upgrade pip' command.\n", "Build completed successfully\n" ] } @@ -112,14 +115,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "8974f8bddcf919052aaefc8e563afb8b37bef38595874355b6868e0fa827a047\r\n" + "85ebfc6c41ef145b578077809af81a23ecb6c7ffe261645b098466d6fcda6ecb\r\n" ] } ], @@ -136,29 +139,42 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "----------------------------------------\n", - "SENDING NEW REQUEST:\n", - "\n", - "[[6.297 2.485 5.03 0.09 ]]\n", - "RECEIVED RESPONSE:\n", - "meta {\n", - "}\n", - "data {\n", - " ndarray {\n", - " values {\n", - " number_value: 1.0\n", - " }\n", - " }\n", - "}\n", - "\n", - "\n" + "----------------------------------------\r\n", + "SENDING NEW REQUEST:\r\n", + "\r\n", + "[[6.834 4.605 7.238 2.832]]\r\n", + "RECEIVED RESPONSE:\r\n", + "meta {\r\n", + "}\r\n", + "data {\r\n", + " names: \"t:0\"\r\n", + " names: \"t:1\"\r\n", + " names: \"t:2\"\r\n", + " ndarray {\r\n", + " values {\r\n", + " list_value {\r\n", + " values {\r\n", + " number_value: 7.698570018103115e-05\r\n", + " }\r\n", + " values {\r\n", + " number_value: 0.037101590872860316\r\n", + " }\r\n", + " values {\r\n", + " number_value: 0.9628214234269586\r\n", + " }\r\n", + " }\r\n", + " }\r\n", + " }\r\n", + "}\r\n", + "\r\n", + "\r\n" ] } ], @@ -168,7 +184,73 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sklearnserver\r\n" + ] + } + ], + "source": [ + "!docker rm sklearnserver --force" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "d7298dbeaee7508c995d817901b84cf983397003cd1eb74dabc46fd14dad49b0\r\n" + ] + } + ], + "source": [ + "!docker run --rm -d --name \"sklearnserver\" -p 5000:5000 -e PREDICTIVE_UNIT_PARAMETERS='[{\"type\":\"STRING\",\"name\":\"method\",\"value\":\"predict\"},{\"type\":\"STRING\",\"name\":\"model_uri\",\"value\":\"file:///model\"}]' -v ${PWD}:/model seldonio/sklearnserver_rest:0.1" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------\r\n", + "SENDING NEW REQUEST:\r\n", + "\r\n", + "[[7.22 3.214 1.305 2.948]]\r\n", + "RECEIVED RESPONSE:\r\n", + "meta {\r\n", + "}\r\n", + "data {\r\n", + " ndarray {\r\n", + " values {\r\n", + " number_value: 0.0\r\n", + " }\r\n", + " }\r\n", + "}\r\n", + "\r\n", + "\r\n" + ] + } + ], + "source": [ + "!seldon-core-tester contract.json 0.0.0.0 5000 -p" + ] + }, + { + "cell_type": "code", + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -192,7 +274,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -215,6 +297,9 @@ "Downloading https://files.pythonhosted.org/packages/5d/bd/c0feba81fb60e231cf40fc8a322ed5873c90ef7711795508692b1481a4ae/scipy-1.3.0-cp37-cp37m-manylinux1_x86_64.whl (25.2MB)\n", "Installing collected packages: scipy, scikit-learn, joblib\n", "Successfully installed joblib-0.13.2 scikit-learn-0.20.3 scipy-1.3.0\n", + "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n", + "WARNING: You are using pip version 19.1.1, however version 19.2.1 is available.\n", + "You should consider upgrading via the 'pip install --upgrade pip' command.\n", "Build completed successfully\n" ] } @@ -225,14 +310,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "e7eda83721402970145f00541e07a8cc67c9b21bd2d898ec49687bb897441c7e\r\n" + "9d0218b348e186596717736035bf67fc75f91ec0bdf8152b9d1ad9734d842d54\r\n" ] } ], @@ -249,29 +334,42 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "----------------------------------------\n", - "SENDING NEW REQUEST:\n", - "\n", - "[[4.637 4.314 2.837 2.695]]\n", - "RECEIVED RESPONSE:\n", - "meta {\n", - "}\n", - "data {\n", - " ndarray {\n", - " values {\n", - " number_value: 0.0\n", - " }\n", - " }\n", - "}\n", - "\n", - "\n" + "----------------------------------------\r\n", + "SENDING NEW REQUEST:\r\n", + "\r\n", + "[[6.538 4.217 6.519 0.217]]\r\n", + "RECEIVED RESPONSE:\r\n", + "meta {\r\n", + "}\r\n", + "data {\r\n", + " names: \"t:0\"\r\n", + " names: \"t:1\"\r\n", + " names: \"t:2\"\r\n", + " ndarray {\r\n", + " values {\r\n", + " list_value {\r\n", + " values {\r\n", + " number_value: 0.003966041860793068\r\n", + " }\r\n", + " values {\r\n", + " number_value: 0.8586797745038719\r\n", + " }\r\n", + " values {\r\n", + " number_value: 0.13735418363533516\r\n", + " }\r\n", + " }\r\n", + " }\r\n", + " }\r\n", + "}\r\n", + "\r\n", + "\r\n" ] } ], @@ -288,7 +386,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -298,14 +396,20 @@ "----------------------------------------\r\n", "SENDING NEW REQUEST:\r\n", "\r\n", - "[[7.991 3.926 8.303 1.303]]\r\n", + "[[4.404 4.341 5.101 0.219]]\r\n", "RECEIVED RESPONSE:\r\n", "meta {\r\n", "}\r\n", "data {\r\n", + " names: \"t:0\"\r\n", + " names: \"t:1\"\r\n", + " names: \"t:2\"\r\n", " tensor {\r\n", " shape: 1\r\n", - " values: 2.0\r\n", + " shape: 3\r\n", + " values: 0.10494571335925532\r\n", + " values: 0.6017695103262425\r\n", + " values: 0.29328477631450234\r\n", " }\r\n", "}\r\n", "\r\n", @@ -319,7 +423,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -334,6 +438,34 @@ "!docker rm sklearnserver --force" ] }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def x(a=None,b=2):\n", + " print(a,b)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 3\n" + ] + } + ], + "source": [ + "x(b=3,a=1)" + ] + }, { "cell_type": "code", "execution_count": null,