diff --git a/doc/source/analytics/explainers.md b/doc/source/analytics/explainers.md index 7a796f33a2..ef61916e87 100644 --- a/doc/source/analytics/explainers.md +++ b/doc/source/analytics/explainers.md @@ -39,6 +39,13 @@ For Alibi explainers that need to be trained you should The runtime environment in our [Alibi Explain Server](https://github.com/SeldonIO/seldon-core/tree/master/components/alibi-explain-server) is locked using [Poetry](https://python-poetry.org/). See our e2e example [here](../examples/iris_explainer_poetry.html) on how to use that definition to train your explainers. +### V2 protocol for explainer using [MLServer](https://github.com/SeldonIO/MLServer) (incubating) + +The support for v2 protocol is now handled with MLServer moving forward. This is experimental +and only works for black-box explainers. + +For an e2e example, please check AnchorTabular notebook [here](../examples/iris_anchor_tabular_explainer_v2.html). + ## Explain API For the Seldon Protocol an endpoint path will be exposed for: @@ -84,7 +91,7 @@ The explain method is also supported for tensorflow and v2 kfserving protocols. | ------ | ----- | | seldon | `http:////api/v1.0/explain` | | tensorflow | `http:////v1/models/:explain` | -| kfserving | `http:////v2/models//explain` | +| kfserving | `http:////v2/models//infer` | Note: for `tensorflow` protocol we support similar non-standard extension as for the [prediction API](../graph/protocols.md#rest-and-grpc-tensorflow-protocol), `http:////v1/models/:explain`. diff --git a/doc/source/examples/iris_anchor_tabular_explainer_v2.nblink b/doc/source/examples/iris_anchor_tabular_explainer_v2.nblink new file mode 100644 index 0000000000..a66cecc766 --- /dev/null +++ b/doc/source/examples/iris_anchor_tabular_explainer_v2.nblink @@ -0,0 +1,3 @@ +{ + "path": "../../../notebooks/explainer_examples_v2.ipynb" +} diff --git a/helm-charts/seldon-core-operator/templates/customresourcedefinition_seldondeployments.machinelearning.seldon.io.yaml b/helm-charts/seldon-core-operator/templates/customresourcedefinition_seldondeployments.machinelearning.seldon.io.yaml index be053812b6..f5348013e1 100644 --- a/helm-charts/seldon-core-operator/templates/customresourcedefinition_seldondeployments.machinelearning.seldon.io.yaml +++ b/helm-charts/seldon-core-operator/templates/customresourcedefinition_seldondeployments.machinelearning.seldon.io.yaml @@ -4272,6 +4272,8 @@ spec: type: object envSecretRefName: type: string + initParameters: + type: string modelUri: type: string replicas: diff --git a/helm-charts/seldon-core-operator/templates/customresourcedefinition_v1_seldondeployments.machinelearning.seldon.io.yaml b/helm-charts/seldon-core-operator/templates/customresourcedefinition_v1_seldondeployments.machinelearning.seldon.io.yaml index f27ad36d35..c976779e77 100644 --- a/helm-charts/seldon-core-operator/templates/customresourcedefinition_v1_seldondeployments.machinelearning.seldon.io.yaml +++ b/helm-charts/seldon-core-operator/templates/customresourcedefinition_v1_seldondeployments.machinelearning.seldon.io.yaml @@ -5234,6 +5234,8 @@ spec: type: object envSecretRefName: type: string + initParameters: + type: string modelUri: type: string replicas: @@ -11421,6 +11423,8 @@ spec: type: object envSecretRefName: type: string + initParameters: + type: string modelUri: type: string replicas: @@ -17608,6 +17612,8 @@ spec: type: object envSecretRefName: type: string + initParameters: + type: string modelUri: type: string replicas: diff --git a/helm-charts/seldon-core-operator/values.yaml b/helm-charts/seldon-core-operator/values.yaml index 24147a4623..2cbd07c99e 100644 --- a/helm-charts/seldon-core-operator/values.yaml +++ b/helm-charts/seldon-core-operator/values.yaml @@ -27,7 +27,7 @@ certManager: # ## Install with limited namespace visibility # If you want to ensure seldon-core-controller can only have visibility -# to specifci namespaces you can set the controllerId +# to specify namespaces you can set the controllerId controllerId: "" # Whether operator should create the webhooks and configmap on startup (false means created from chart) @@ -43,7 +43,7 @@ defaultUserID: "8888" managerUserID: 8888 # ## Service Orchestrator (Executor) -# The executor is the default service orchestrator which has superceeded the "Java Engine" +# The executor is the default service orchestrator which has superceded the "Java Engine" executor: port: 8000 metricsPortName: metrics @@ -119,7 +119,7 @@ predictor_servers: defaultImageVersion: "1.12.0-dev" image: seldonio/mlflowserver kfserving: - defaultImageVersion: "0.5.0" + defaultImageVersion: "0.6.0.dev0" image: seldonio/mlserver SKLEARN_SERVER: protocols: @@ -127,7 +127,7 @@ predictor_servers: defaultImageVersion: "1.12.0-dev" image: seldonio/sklearnserver kfserving: - defaultImageVersion: "0.5.0" + defaultImageVersion: "0.6.0.dev0" image: seldonio/mlserver TENSORFLOW_SERVER: protocols: @@ -143,7 +143,7 @@ predictor_servers: defaultImageVersion: "1.12.0-dev" image: seldonio/xgboostserver kfserving: - defaultImageVersion: "0.5.0" + defaultImageVersion: "0.6.0.dev0" image: seldonio/mlserver TRITON_SERVER: protocols: @@ -153,7 +153,7 @@ predictor_servers: TEMPO_SERVER: protocols: kfserving: - defaultImageVersion: "0.5.0" + defaultImageVersion: "0.6.0.dev0" image: seldonio/mlserver # ## Other @@ -206,3 +206,4 @@ engine: # Explainer image explainer: image: seldonio/alibiexplainer:1.12.0-dev + image_v2: seldonio/mlserver:0.6.0.dev2 diff --git a/notebooks/explainer_examples_v2.ipynb b/notebooks/explainer_examples_v2.ipynb new file mode 100644 index 0000000000..f07e94462f --- /dev/null +++ b/notebooks/explainer_examples_v2.ipynb @@ -0,0 +1,445 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d985c392", + "metadata": {}, + "source": [ + "# Example model explanations with Seldon and v2 Protocol - Incubating\n", + "\n", + "In this notebook we will show examples that illustrate how to explain models using [MLServer] (https://github.com/SeldonIO/MLServer).\n", + "\n", + "MLServer is a Python server for your machine learning models through a REST and gRPC interface, fully compliant with KFServing's v2 Dataplane spec. " + ] + }, + { + "cell_type": "markdown", + "id": "b9da1204", + "metadata": {}, + "source": [ + "## Running this Notebook\n", + "\n", + " This should install the required package dependencies, if not please also install:\n", + " \n", + "- install and configure `mc`, follow the relevant section in this [link](https://docs.seldon.io/projects/seldon-core/en/latest/examples/minio_setup.html)\n", + "\n", + "- run this jupyter notebook in conda environment\n", + "```bash\n", + "$ conda create --name python3.8-example python=3.8 -y\n", + "$ conda activate python3.8-example\n", + "$ pip install jupyter\n", + "$ jupyter notebook\n", + "```\n", + "\n", + "- instal requirements\n", + " - [alibi package](https://pypi.org/project/alibi/)\n", + " - `sklearn`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf7c5645", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install sklearn alibi" + ] + }, + { + "cell_type": "markdown", + "id": "591c69b6", + "metadata": {}, + "source": [ + "## Setup Seldon Core\n", + "\n", + "Follow the instructions to [Setup Cluster](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html#Setup-Cluster) with [Ambassador Ingress](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html#Ambassador) and [Install Seldon Core](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html#Install-Seldon-Core).\n", + "\n", + " Then port-forward to that ingress on localhost:8003 in a separate terminal either with:\n", + "\n", + " * Ambassador: `kubectl port-forward $(kubectl get pods -n seldon -l app.kubernetes.io/name=ambassador -o jsonpath='{.items[0].metadata.name}') -n seldon 8003:8080`\n", + " * Istio: `kubectl port-forward $(kubectl get pods -l istio=ingressgateway -n istio-system -o jsonpath='{.items[0].metadata.name}') -n istio-system 8003:8080`\n", + "\n", + "### Setup MinIO\n", + "\n", + "Use the provided [notebook](https://docs.seldon.io/projects/seldon-core/en/latest/examples/minio_setup.html) to install Minio in your cluster and configure `mc` CLI tool. \n", + "Instructions [also online](https://docs.seldon.io/projects/seldon-core/en/latest/examples/minio_setup.html)." + ] + }, + { + "cell_type": "markdown", + "id": "7f4a7085", + "metadata": {}, + "source": [ + "## Train `iris` model using `sklearn`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41bf4ce1", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import shutil\n", + "\n", + "from joblib import dump\n", + "from sklearn.datasets import load_iris\n", + "from sklearn.linear_model import LogisticRegression" + ] + }, + { + "cell_type": "markdown", + "id": "d658769e", + "metadata": {}, + "source": [ + "### Train model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "029ba0eb", + "metadata": {}, + "outputs": [], + "source": [ + "iris_data = load_iris()\n", + "\n", + "clf = LogisticRegression(solver=\"liblinear\", multi_class=\"ovr\")\n", + "clf.fit(iris_data.data, iris_data.target)" + ] + }, + { + "cell_type": "markdown", + "id": "719842c1", + "metadata": {}, + "source": [ + "### Save model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8757d345", + "metadata": {}, + "outputs": [], + "source": [ + "modelpath = \"/tmp/sklearn_iris\"\n", + "if os.path.exists(modelpath):\n", + " shutil.rmtree(modelpath)\n", + "os.makedirs(modelpath)\n", + "modelfile = os.path.join(modelpath, \"model.joblib\")\n", + "\n", + "dump(clf, modelfile)" + ] + }, + { + "cell_type": "markdown", + "id": "d67818f2", + "metadata": {}, + "source": [ + "## Create `AnchorTabular` explainer " + ] + }, + { + "cell_type": "markdown", + "id": "c369a479", + "metadata": {}, + "source": [ + "### Create explainer artifact" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2dcaf4c0", + "metadata": {}, + "outputs": [], + "source": [ + "from alibi.explainers import AnchorTabular\n", + "\n", + "explainer = AnchorTabular(clf.predict, feature_names=iris_data.feature_names)\n", + "explainer.fit(iris_data.data, disc_perc=(25, 50, 75))" + ] + }, + { + "cell_type": "markdown", + "id": "bcb9338e", + "metadata": {}, + "source": [ + "### Save explainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c854f554", + "metadata": {}, + "outputs": [], + "source": [ + "explainerpath = \"/tmp/iris_anchor_tabular_explainer_v2\"\n", + "if os.path.exists(explainerpath):\n", + " shutil.rmtree(explainerpath)\n", + "explainer.save(explainerpath)" + ] + }, + { + "cell_type": "markdown", + "id": "20b490f9", + "metadata": {}, + "source": [ + "## Install dependencies to pack the enviornment for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2bd4524", + "metadata": {}, + "outputs": [], + "source": [ + "pip install conda-pack mlserver==0.6.0.dev2 mlserver-alibi-explain==0.6.0.dev2" + ] + }, + { + "cell_type": "markdown", + "id": "2fb3f64d", + "metadata": {}, + "source": [ + "## Pack enviornment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "482071c2", + "metadata": {}, + "outputs": [], + "source": [ + "import conda_pack\n", + "\n", + "env_file_path = os.path.join(explainerpath, \"environment.tar.gz\")\n", + "conda_pack.pack(\n", + " output=str(env_file_path),\n", + " force=True,\n", + " verbose=True,\n", + " ignore_editable_packages=False,\n", + " ignore_missing_files=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b33bfef3", + "metadata": {}, + "source": [ + "## Copy artifacts to object store (`minio`)\n", + "\n", + "### Configure `mc` to access the minio service in the local kind cluster\n", + "note: make sure that minio ip is reflected properly below, run:\n", + "- `kubectl get service -n minio-system`\n", + "- `mc config host add minio-seldon [ip] minioadmin minioadmin`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b4b277b", + "metadata": {}, + "outputs": [], + "source": [ + "target_bucket = \"minio-seldon/models\"\n", + "os.system(f\"mc rb --force {target_bucket}\")\n", + "os.system(f\"mc mb {target_bucket}\")\n", + "os.system(f\"mc cp --recursive {modelpath} {target_bucket}\")\n", + "os.system(f\"mc cp --recursive {explainerpath} {target_bucket}\")" + ] + }, + { + "cell_type": "markdown", + "id": "48d5eef5", + "metadata": {}, + "source": [ + "## Deploy to local `kind` cluster" + ] + }, + { + "cell_type": "markdown", + "id": "ebd83919", + "metadata": {}, + "source": [ + "### Create deployment CRD" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d7548d2", + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile iris-with-explainer-v2.yaml\n", + "apiVersion: machinelearning.seldon.io/v1\n", + "kind: SeldonDeployment\n", + "metadata:\n", + " name: iris\n", + "spec:\n", + " protocol: kfserving # Activate v2 protocol / mlserver usage\n", + " name: iris\n", + " annotations:\n", + " seldon.io/rest-timeout: \"100000\"\n", + " predictors:\n", + " - graph:\n", + " children: []\n", + " implementation: SKLEARN_SERVER\n", + " modelUri: s3://models/sklearn_iris\n", + " envSecretRefName: seldon-rclone-secret\n", + " name: classifier\n", + " explainer:\n", + " type: AnchorTabular\n", + " modelUri: s3://models/iris_anchor_tabular_explainer_v2\n", + " envSecretRefName: seldon-rclone-secret\n", + " name: default\n", + " replicas: 1" + ] + }, + { + "cell_type": "markdown", + "id": "62f79d0f", + "metadata": {}, + "source": [ + "### Deploy" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "120475cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "seldondeployment.machinelearning.seldon.io/iris created\r\n" + ] + } + ], + "source": [ + "!kubectl apply -f iris-with-explainer-v2.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e3038ca", + "metadata": {}, + "outputs": [], + "source": [ + "!kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id=iris -o jsonpath='{.items[0].metadata.name}')" + ] + }, + { + "cell_type": "markdown", + "id": "268d97b3", + "metadata": {}, + "source": [ + "### Test explainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d27de524", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install numpy requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fbba7464", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "import numpy as np\n", + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6db9e7dc", + "metadata": {}, + "outputs": [], + "source": [ + "endpoint = \"http://localhost:8003/seldon/seldon/iris-explainer/default/v2/models/iris-default-explainer/infer\"\n", + "\n", + "test_data = np.array([[5.964, 4.006, 2.081, 1.031]])\n", + "\n", + "inference_request = {\n", + " \"parameters\": {\"content_type\": \"np\"},\n", + " \"inputs\": [\n", + " {\n", + " \"name\": \"explain\",\n", + " \"shape\": test_data.shape,\n", + " \"datatype\": \"FP32\",\n", + " \"data\": test_data.tolist(),\n", + " \"parameters\": {\"content_type\": \"np\"},\n", + " },\n", + " ],\n", + "}\n", + "response = requests.post(endpoint, json=inference_request)\n", + "\n", + "explanation = json.loads(response.json()[\"outputs\"][0][\"data\"])\n", + "print(\"Anchor: %s\" % (\" AND \".join(explanation[\"data\"][\"anchor\"])))\n", + "print(\"Precision: %.2f\" % explanation[\"data\"][\"precision\"])\n", + "print(\"Coverage: %.2f\" % explanation[\"data\"][\"coverage\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8edd6c8c", + "metadata": {}, + "outputs": [], + "source": [ + "!kubectl delete -f iris-with-explainer-v2.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "174996d5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go b/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go index 5aaf9bde4d..eb34ac7602 100644 --- a/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go +++ b/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go @@ -290,6 +290,7 @@ type Explainer struct { EnvSecretRefName string `json:"envSecretRefName,omitempty" protobuf:"bytes,7,opt,name=envSecretRefName"` StorageInitializerImage string `json:"storageInitializerImage,omitempty" protobuf:"bytes,8,opt,name=storageInitializerImage"` Replicas *int32 `json:"replicas,omitempty" protobuf:"string,9,opt,name=replicas"` + InitParameters string `json:"initParameters,omitempty" protobuf:"string,10,opt,name=initParameters"` } // ObjectMeta is a copy of the "k8s.io/apimachinery/pkg/apis/meta/v1" ObjectMeta. diff --git a/operator/apis/machinelearning.seldon.io/v1/zz_generated.deepcopy.go b/operator/apis/machinelearning.seldon.io/v1/zz_generated.deepcopy.go index 79c0260b9c..eb7040fb7c 100644 --- a/operator/apis/machinelearning.seldon.io/v1/zz_generated.deepcopy.go +++ b/operator/apis/machinelearning.seldon.io/v1/zz_generated.deepcopy.go @@ -1,4 +1,3 @@ -//go:build !ignore_autogenerated // +build !ignore_autogenerated /* diff --git a/operator/apis/machinelearning.seldon.io/v1alpha2/zz_generated.deepcopy.go b/operator/apis/machinelearning.seldon.io/v1alpha2/zz_generated.deepcopy.go index dbb0cc8f25..0f7a9994a1 100644 --- a/operator/apis/machinelearning.seldon.io/v1alpha2/zz_generated.deepcopy.go +++ b/operator/apis/machinelearning.seldon.io/v1alpha2/zz_generated.deepcopy.go @@ -1,4 +1,3 @@ -//go:build !ignore_autogenerated // +build !ignore_autogenerated /* diff --git a/operator/apis/machinelearning.seldon.io/v1alpha3/zz_generated.deepcopy.go b/operator/apis/machinelearning.seldon.io/v1alpha3/zz_generated.deepcopy.go index dc93538fb4..dc29ba5127 100644 --- a/operator/apis/machinelearning.seldon.io/v1alpha3/zz_generated.deepcopy.go +++ b/operator/apis/machinelearning.seldon.io/v1alpha3/zz_generated.deepcopy.go @@ -1,4 +1,3 @@ -//go:build !ignore_autogenerated // +build !ignore_autogenerated /* diff --git a/operator/config/crd/bases/machinelearning.seldon.io_seldondeployments.yaml b/operator/config/crd/bases/machinelearning.seldon.io_seldondeployments.yaml index 2a4e88fb30..1f72442b08 100644 --- a/operator/config/crd/bases/machinelearning.seldon.io_seldondeployments.yaml +++ b/operator/config/crd/bases/machinelearning.seldon.io_seldondeployments.yaml @@ -9322,6 +9322,8 @@ spec: type: object envSecretRefName: type: string + initParameters: + type: string modelUri: type: string replicas: diff --git a/operator/config/crd_v1/bases/machinelearning.seldon.io_seldondeployments.yaml b/operator/config/crd_v1/bases/machinelearning.seldon.io_seldondeployments.yaml index 8300097a7b..bb0c627646 100644 --- a/operator/config/crd_v1/bases/machinelearning.seldon.io_seldondeployments.yaml +++ b/operator/config/crd_v1/bases/machinelearning.seldon.io_seldondeployments.yaml @@ -9598,6 +9598,8 @@ spec: type: object envSecretRefName: type: string + initParameters: + type: string modelUri: type: string replicas: @@ -19546,6 +19548,8 @@ spec: type: object envSecretRefName: type: string + initParameters: + type: string modelUri: type: string replicas: @@ -29494,6 +29498,8 @@ spec: type: object envSecretRefName: type: string + initParameters: + type: string modelUri: type: string replicas: diff --git a/operator/config/manager/configmap.yaml b/operator/config/manager/configmap.yaml index 6409a9d1ea..41841ef6e2 100644 --- a/operator/config/manager/configmap.yaml +++ b/operator/config/manager/configmap.yaml @@ -93,4 +93,5 @@ data: explainer: |- { "image" : "seldonio/alibiexplainer:1.1.1-SNAPSHOT" + "image_v2" : "seldonio/mlserver:0.6.0" } diff --git a/operator/controllers/mlserver.go b/operator/controllers/mlserver.go index 4500578897..545d8bbd3a 100644 --- a/operator/controllers/mlserver.go +++ b/operator/controllers/mlserver.go @@ -3,9 +3,10 @@ package controllers import ( "errors" "fmt" - "github.com/seldonio/seldon-core/operator/utils" "strconv" + "github.com/seldonio/seldon-core/operator/utils" + machinelearningv1 "github.com/seldonio/seldon-core/operator/apis/machinelearning.seldon.io/v1" "github.com/seldonio/seldon-core/operator/constants" v1 "k8s.io/api/core/v1" @@ -13,10 +14,11 @@ import ( ) const ( - MLServerSKLearnImplementation = "mlserver_sklearn.SKLearnModel" - MLServerXGBoostImplementation = "mlserver_xgboost.XGBoostModel" - MLServerTempoImplementation = "tempo.mlserver.InferenceRuntime" - MLServerMLFlowImplementation = "mlserver_mlflow.MLflowRuntime" + MLServerSKLearnImplementation = "mlserver_sklearn.SKLearnModel" + MLServerXGBoostImplementation = "mlserver_xgboost.XGBoostModel" + MLServerTempoImplementation = "tempo.mlserver.InferenceRuntime" + MLServerMLFlowImplementation = "mlserver_mlflow.MLflowRuntime" + MLServerAlibiExplainImplementation = "mlserver_alibi_explain.AlibiExplainRuntime" MLServerHTTPPortEnv = "MLSERVER_HTTP_PORT" MLServerGRPCPortEnv = "MLSERVER_GRPC_PORT" @@ -24,6 +26,21 @@ const ( MLServerModelImplementationEnv = "MLSERVER_MODEL_IMPLEMENTATION" MLServerModelURIEnv = "MLSERVER_MODEL_URI" MLServerTempoRuntimeEnv = "TEMPO_RUNTIME_OPTIONS" + MLServerModelExtraEnv = "MLSERVER_MODEL_EXTRA" +) + +var ( + ExplainerTypeToMLServerExplainerType = map[machinelearningv1.AlibiExplainerType]string{ + machinelearningv1.AlibiAnchorsTabularExplainer: "anchor_tabular", + machinelearningv1.AlibiAnchorsImageExplainer: "anchor_image", + machinelearningv1.AlibiAnchorsTextExplainer: "anchor_text", + machinelearningv1.AlibiCounterfactualsExplainer: "counterfactuals", + machinelearningv1.AlibiContrastiveExplainer: "contrastive", + machinelearningv1.AlibiKernelShapExplainer: "kernel_shap", + machinelearningv1.AlibiIntegratedGradientsExplainer: "integrated_gradients", + machinelearningv1.AlibiALEExplainer: "ALE", + machinelearningv1.AlibiTreeShap: "tree_shap", + } ) func mergeMLServerContainer(existing *v1.Container, mlServer *v1.Container) *v1.Container { @@ -222,3 +239,71 @@ func getMLServerModelImplementation(pu *machinelearningv1.PredictiveUnit) (strin return "", nil } } + +func getAlibiExplainExplainerTypeTag(explainerType machinelearningv1.AlibiExplainerType) (string, error) { + tag, ok := ExplainerTypeToMLServerExplainerType[explainerType] + if ok { + return tag, nil + } else { + return "", errors.New(string(explainerType) + " not supported") + } +} + +func wrapDoubleQuotes(str string) string { + const escQuotes string = "\"" + return escQuotes + str + escQuotes +} +func getAlibiExplainExtraEnvVars(explainerType machinelearningv1.AlibiExplainerType, pSvcEndpoint string, graphName string, initParameters string) (string, error) { + // we need to pack one big envVar for MLSERVER_MODEL_EXTRA that can contain nested json / dict + explainerTypeTag, err := getAlibiExplainExplainerTypeTag(explainerType) + if err != nil { + return "", err + } + + v2URI := "http://" + pSvcEndpoint + "/v2/models/" + graphName + "/infer" + explainExtraEnv := "{" + wrapDoubleQuotes("explainer_type") + ":" + wrapDoubleQuotes(explainerTypeTag) + explainExtraEnv = explainExtraEnv + "," + wrapDoubleQuotes("infer_uri") + ":" + wrapDoubleQuotes(v2URI) + + if initParameters != "" { + //init parameters is passed as json string so we need to reconstruct the dictionary + explainExtraEnv = explainExtraEnv + "," + wrapDoubleQuotes("init_parameters") + ":" + initParameters + } + + // end + explainExtraEnv = explainExtraEnv + "}" + + return explainExtraEnv, nil +} + +func getAlibiExplainEnvVars(httpPortNum int, explainerModelName string, explainerType machinelearningv1.AlibiExplainerType, pSvcEndpoint string, graphName string, initParameters string) ([]v1.EnvVar, error) { + explain_extra_env, err := getAlibiExplainExtraEnvVars(explainerType, pSvcEndpoint, graphName, initParameters) + if err != nil { + return nil, err + } + alibiEnvs := []v1.EnvVar{ + { + Name: MLServerHTTPPortEnv, + Value: strconv.Itoa(httpPortNum), + }, + // note: we skip grpc port settings, relying on mlserver default + // TODO: add gprc port + { + Name: MLServerModelImplementationEnv, + Value: MLServerAlibiExplainImplementation, + }, + { + Name: MLServerModelNameEnv, + Value: explainerModelName, + }, + { + Name: MLServerModelURIEnv, + Value: DefaultModelLocalMountPath, + }, + { + Name: MLServerModelExtraEnv, + Value: explain_extra_env, + }, + } + return alibiEnvs, nil + +} diff --git a/operator/controllers/mlserver_test.go b/operator/controllers/mlserver_test.go index 8f263573f2..cd138c4f6e 100644 --- a/operator/controllers/mlserver_test.go +++ b/operator/controllers/mlserver_test.go @@ -2,6 +2,7 @@ package controllers import ( "fmt" + machinelearningv1 "github.com/seldonio/seldon-core/operator/apis/machinelearning.seldon.io/v1" v1 "k8s.io/api/core/v1" @@ -162,3 +163,46 @@ var _ = Describe("MLServer helpers", func() { ) }) }) + +var _ = Describe("MLServer explain helpers", func() { + Describe("getAlibiExplainExtraEnvVars", func() { + DescribeTable( + "returns the right extra envs", + func(explainerType machinelearningv1.AlibiExplainerType, pSvcEndpoint string, graphName string, initParameters string, expected string) { + + extraEnvs, _ := getAlibiExplainExtraEnvVars(explainerType, pSvcEndpoint, graphName, initParameters) + Expect(extraEnvs).To(Equal(expected)) + }, + Entry("anchor text", machinelearningv1.AlibiAnchorsTabularExplainer, "url", "p", "", "{\"explainer_type\":\"anchor_tabular\",\"infer_uri\":\"http://url/v2/models/p/infer\"}"), + Entry("anchor image", machinelearningv1.AlibiAnchorsImageExplainer, "url", "p", "", "{\"explainer_type\":\"anchor_image\",\"infer_uri\":\"http://url/v2/models/p/infer\"}"), + Entry("anchor text with empty init", machinelearningv1.AlibiAnchorsTabularExplainer, "url", "p", "{}", "{\"explainer_type\":\"anchor_tabular\",\"infer_uri\":\"http://url/v2/models/p/infer\",\"init_parameters\":{}}"), + Entry("anchor text with init", machinelearningv1.AlibiAnchorsTabularExplainer, "url", "p", "{\"v\":2}", "{\"explainer_type\":\"anchor_tabular\",\"infer_uri\":\"http://url/v2/models/p/infer\",\"init_parameters\":{\"v\":2}}"), + ) + }) + + Describe("getAlibiExplainExplainerTypeTag", func() { + DescribeTable( + "returns the right explainer tag", + func(explainerType machinelearningv1.AlibiExplainerType, expected string) { + + tag, err := getAlibiExplainExplainerTypeTag(explainerType) + if err == nil { + Expect(tag).To(Equal(expected)) + } else { + // if there is an error, the tag should also be "" + Expect(tag).To(Equal("")) + } + }, + Entry("anchor text", machinelearningv1.AlibiAnchorsTabularExplainer, "anchor_tabular"), + Entry("anchor image", machinelearningv1.AlibiAnchorsImageExplainer, "anchor_image"), + Entry("anchor image", machinelearningv1.AlibiAnchorsTextExplainer, "anchor_text"), + Entry("anchor image", machinelearningv1.AlibiCounterfactualsExplainer, "counterfactuals"), + Entry("anchor image", machinelearningv1.AlibiContrastiveExplainer, "contrastive"), + Entry("anchor image", machinelearningv1.AlibiKernelShapExplainer, "kernel_shap"), + Entry("anchor image", machinelearningv1.AlibiIntegratedGradientsExplainer, "integrated_gradients"), + Entry("anchor image", machinelearningv1.AlibiALEExplainer, "ALE"), + Entry("anchor image", machinelearningv1.AlibiTreeShap, "tree_shap"), + Entry("unknown", machinelearningv1.AlibiExplainerType("unknown"), ""), + ) + }) +}) diff --git a/operator/controllers/seldondeployment_controller.go b/operator/controllers/seldondeployment_controller.go index 6b1a42b307..023ce651e2 100644 --- a/operator/controllers/seldondeployment_controller.go +++ b/operator/controllers/seldondeployment_controller.go @@ -1082,12 +1082,6 @@ func (r *SeldonDeploymentReconciler) createIstioServices(components *components, func (r *SeldonDeploymentReconciler) createServices(components *components, instance *machinelearningv1.SeldonDeployment, all bool, log logr.Logger) (bool, error) { ready := true for _, svc := range components.services { - if !all { - if _, ok := svc.Annotations[AMBASSADOR_ANNOTATION]; ok { - log.Info("Skipping Ambassador Svc", "all", all, "namespace", svc.Namespace, "name", svc.Name) - continue - } - } if err := ctrl.SetControllerReference(instance, svc, r.Scheme); err != nil { return ready, err } diff --git a/operator/controllers/seldondeployment_explainers.go b/operator/controllers/seldondeployment_explainers.go index e43c58a5ef..806983b018 100644 --- a/operator/controllers/seldondeployment_explainers.go +++ b/operator/controllers/seldondeployment_explainers.go @@ -58,7 +58,8 @@ func NewExplainerInitializer(ctx context.Context, clientset kubernetes.Interface } type ExplainerConfig struct { - Image string `json:"image"` + Image string `json:"image"` + Image_v2 string `json:"image_v2"` } func (ei *ExplainerInitialiser) getExplainerConfigs() (*ExplainerConfig, error) { @@ -103,6 +104,14 @@ func (ei *ExplainerInitialiser) createExplainer(mlDep *machinelearningv1.SeldonD p.Graph.Endpoint = &machinelearningv1.Endpoint{Type: machinelearningv1.REST} } + explainerProtocol := string(machinelearningv1.ProtocolSeldon) + if mlDep.Spec.Protocol == machinelearningv1.ProtocolTensorflow { + explainerProtocol = string(machinelearningv1.ProtocolTensorflow) + } + if mlDep.Spec.Protocol == machinelearningv1.ProtocolKfserving { + explainerProtocol = string(machinelearningv1.ProtocolKfserving) + } + // Image from configMap or Relalated Image if its not set if explainerContainer.Image == "" { if envExplainerImage != "" { @@ -112,7 +121,11 @@ func (ei *ExplainerInitialiser) createExplainer(mlDep *machinelearningv1.SeldonD if err != nil { return err } - explainerContainer.Image = config.Image + if explainerProtocol == string(machinelearningv1.ProtocolKfserving) { + explainerContainer.Image = config.Image_v2 + } else { + explainerContainer.Image = config.Image + } } } @@ -138,14 +151,6 @@ func (ei *ExplainerInitialiser) createExplainer(mlDep *machinelearningv1.SeldonD pSvcEndpoint = c.serviceDetails[pSvcName].HttpEndpoint } - explainerProtocol := string(machinelearningv1.ProtocolSeldon) - if mlDep.Spec.Protocol == machinelearningv1.ProtocolTensorflow { - explainerProtocol = string(machinelearningv1.ProtocolTensorflow) - } - if mlDep.Spec.Protocol == machinelearningv1.ProtocolKfserving { - explainerProtocol = string(machinelearningv1.ProtocolKfserving) - } - if customPort == nil { explainerContainer.Ports = append(explainerContainer.Ports, corev1.ContainerPort{Name: portType, ContainerPort: portNum, Protocol: corev1.ProtocolTCP}) } else { @@ -165,37 +170,48 @@ func (ei *ExplainerInitialiser) createExplainer(mlDep *machinelearningv1.SeldonD explainerContainer.Lifecycle = &corev1.Lifecycle{PreStop: &corev1.Handler{Exec: &corev1.ExecAction{Command: []string{"/bin/sh", "-c", "/bin/sleep 10"}}}} } - explainerContainer.Args = []string{ - "--model_name=" + mlDep.Name, - "--predictor_host=" + pSvcEndpoint, - "--protocol=" + explainerProtocol + "." + explainerTransport, - "--http_port=" + strconv.Itoa(int(portNum)), - } + if explainerProtocol == string(machinelearningv1.ProtocolKfserving) { + // add mlserver alibi runtime env vars + // alibi-specific json + explainEnvs, err := getAlibiExplainEnvVars(int(portNum), explainerContainer.Name, p.Explainer.Type, pSvcEndpoint, p.Graph.Name, p.Explainer.InitParameters) + if err != nil { + return err + } - if p.Explainer.ModelUri != "" { - explainerContainer.Args = append(explainerContainer.Args, "--storage_uri="+DefaultModelLocalMountPath) - } + explainerContainer.Env = explainEnvs + } else { + explainerContainer.Args = []string{ + "--model_name=" + mlDep.Name, + "--predictor_host=" + pSvcEndpoint, + "--protocol=" + explainerProtocol + "." + explainerTransport, + "--http_port=" + strconv.Itoa(int(portNum)), + } - explainerContainer.Args = append(explainerContainer.Args, string(p.Explainer.Type)) + if p.Explainer.ModelUri != "" { + explainerContainer.Args = append(explainerContainer.Args, "--storage_uri="+DefaultModelLocalMountPath) + } - if p.Explainer.Type == machinelearningv1.AlibiAnchorsImageExplainer { - explainerContainer.Args = append(explainerContainer.Args, "--tf_data_type=float32") - } + explainerContainer.Args = append(explainerContainer.Args, string(p.Explainer.Type)) - // Order explainer config map keys - var keys []string - for k, _ := range p.Explainer.Config { - keys = append(keys, k) - } - sort.Strings(keys) - for _, k := range keys { - v := p.Explainer.Config[k] - //remote files in model location should get downloaded by initializer - if p.Explainer.ModelUri != "" { - v = strings.Replace(v, p.Explainer.ModelUri, "/mnt/models", 1) + if p.Explainer.Type == machinelearningv1.AlibiAnchorsImageExplainer { + explainerContainer.Args = append(explainerContainer.Args, "--tf_data_type=float32") + } + + // Order explainer config map keys + var keys []string + for k, _ := range p.Explainer.Config { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + v := p.Explainer.Config[k] + //remote files in model location should get downloaded by initializer + if p.Explainer.ModelUri != "" { + v = strings.Replace(v, p.Explainer.ModelUri, "/mnt/models", 1) + } + arg := "--" + k + "=" + v + explainerContainer.Args = append(explainerContainer.Args, arg) } - arg := "--" + k + "=" + v - explainerContainer.Args = append(explainerContainer.Args, arg) } seldonPodSpec := machinelearningv1.SeldonPodSpec{Spec: corev1.PodSpec{ diff --git a/operator/controllers/seldondeployment_explainers_test.go b/operator/controllers/seldondeployment_explainers_test.go index 3d40f1c803..27e5cfba63 100644 --- a/operator/controllers/seldondeployment_explainers_test.go +++ b/operator/controllers/seldondeployment_explainers_test.go @@ -152,6 +152,109 @@ var _ = Describe("createExplainer", func() { ) }) +var _ = Describe("Create a V2 Seldon Deployment with explainer", func() { + const timeout = time.Second * 30 + const interval = time.Second * 1 + namespaceName := rand.String(10) + v2protocol := machinelearningv1.ProtocolKfserving + explainerInitParameters := "{'a': 1, 'b': 's', 'c': {'c1': [1, 1]}}" + By("Creating a resource") + It("should create a resource with defaults", func() { + Expect(k8sClient).NotTo(BeNil()) + + modelType := machinelearningv1.MODEL + modelImplementation := machinelearningv1.PredictiveUnitImplementation( + machinelearningv1.PrepackSklearnName, + ) + key := types.NamespacedName{ + Name: "dep", + Namespace: namespaceName, + } + instance := &machinelearningv1.SeldonDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: key.Name, + Namespace: key.Namespace, + }, + Spec: machinelearningv1.SeldonDeploymentSpec{ + Protocol: v2protocol, + Name: "mydep", + Predictors: []machinelearningv1.PredictorSpec{ + { + Name: "p1", + ComponentSpecs: []*machinelearningv1.SeldonPodSpec{ + { + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "classifier", + }, + }, + }, + }, + }, + Graph: machinelearningv1.PredictiveUnit{ + Name: "classifier", + Type: &modelType, + Implementation: &modelImplementation, + }, + Explainer: &machinelearningv1.Explainer{ + Type: machinelearningv1.AlibiAnchorsImageExplainer, + InitParameters: explainerInitParameters, + }, + }, + }, + }, + } + + //Create namespace + namespace := &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: namespaceName, + }, + } + Expect(k8sClient.Create(context.Background(), namespace)).Should(Succeed()) + + // Run Defaulter + instance.Default() + envUseExecutor = "true" + envDefaultUser = "2" + Expect(k8sClient.Create(context.Background(), instance)).Should(Succeed()) + + depKey := types.NamespacedName{ + Name: machinelearningv1.GetDeploymentName(instance, instance.Spec.Predictors[0], instance.Spec.Predictors[0].ComponentSpecs[0], 0), + Namespace: namespaceName, + } + depFetched := &appsv1.Deployment{} + Eventually(func() error { + err := k8sClient.Get(context.Background(), depKey, depFetched) + return err + }, timeout, interval).Should(BeNil()) + + //Check explainer deployment + depKey = types.NamespacedName{ + Name: machinelearningv1.GetExplainerDeploymentName(instance.Name, &instance.Spec.Predictors[0]), + Namespace: namespaceName, + } + depFetched = &appsv1.Deployment{} + Eventually(func() error { + err := k8sClient.Get(context.Background(), depKey, depFetched) + return err + }, timeout, interval).Should(BeNil()) + explainerEnvs := depFetched.Spec.Template.Spec.Containers[0].Env + explainerExpectedExtraEnvs, _ := getAlibiExplainExtraEnvVars(machinelearningv1.AlibiAnchorsImageExplainer, "dep-p1."+namespaceName+":8000", "classifier", explainerInitParameters) + explainerExpectedEnvs := []v1.EnvVar{ + {Name: MLServerHTTPPortEnv, Value: "9000"}, + {Name: MLServerModelImplementationEnv, Value: MLServerAlibiExplainImplementation}, + {Name: MLServerModelNameEnv, Value: "dep-p1-explainer"}, + {Name: MLServerModelURIEnv, Value: DefaultModelLocalMountPath}, + {Name: MLServerModelExtraEnv, Value: explainerExpectedExtraEnvs}, + } + Expect(explainerEnvs).Should(Equal(explainerExpectedEnvs)) + Expect(depFetched.Spec.Template.Spec.Containers[0].Image).Should(Equal("seldonio/mlserver:0.6.0")) + }) + +}) + var _ = Describe("Create a Seldon Deployment with explainer", func() { const timeout = time.Second * 30 const interval = time.Second * 1 diff --git a/operator/controllers/suite_test.go b/operator/controllers/suite_test.go index a435ce2f39..b9c4d9caa3 100644 --- a/operator/controllers/suite_test.go +++ b/operator/controllers/suite_test.go @@ -132,7 +132,8 @@ var configs = map[string]string{ }`, "explainer": ` { - "image" : "seldonio/alibiexplainer:1.2.0" + "image" : "seldonio/alibiexplainer:1.2.0", + "image_v2" : "seldonio/mlserver:0.6.0" }`, } diff --git a/operator/testing/machinelearning.seldon.io_seldondeployments.yaml b/operator/testing/machinelearning.seldon.io_seldondeployments.yaml index b99a840551..538c65c068 100644 --- a/operator/testing/machinelearning.seldon.io_seldondeployments.yaml +++ b/operator/testing/machinelearning.seldon.io_seldondeployments.yaml @@ -7600,6 +7600,8 @@ spec: type: object envSecretRefName: type: string + initParameters: + type: string modelUri: type: string replicas: diff --git a/testing/resources/iris_anchor_tabular_explainer_v2.yaml b/testing/resources/iris_anchor_tabular_explainer_v2.yaml new file mode 100644 index 0000000000..8918355420 --- /dev/null +++ b/testing/resources/iris_anchor_tabular_explainer_v2.yaml @@ -0,0 +1,20 @@ +apiVersion: machinelearning.seldon.io/v1 +kind: SeldonDeployment +metadata: + name: iris +spec: + protocol: kfserving # Activate v2 protocol / mlserver usage + name: iris + annotations: + seldon.io/rest-timeout: "100000" + predictors: + - graph: + children: [] + implementation: SKLEARN_SERVER + modelUri: gs://seldon-models/v1.11.0-dev/sklearn/iris + name: classifier + explainer: + type: AnchorTabular + modelUri: gs://seldon-models/alibi/iris_anchor_tabular_explainer_v2 + name: default + replicas: 1 \ No newline at end of file diff --git a/testing/scripts/test_alibi_explain_v2.py b/testing/scripts/test_alibi_explain_v2.py new file mode 100644 index 0000000000..a402203f86 --- /dev/null +++ b/testing/scripts/test_alibi_explain_v2.py @@ -0,0 +1,69 @@ +import json +import time +from subprocess import run + +import numpy as np +import pytest +import requests +from tenacity import Retrying, stop_after_attempt, wait_fixed + +from seldon_e2e_utils import retry_run, wait_for_deployment + +# NOTE: +# to recreate the artifacts for these test: +# 1. use notebooks/explainer_examples_v2.ipynb to create them +# 2. upload to gs + +AFTER_WAIT_SLEEP = 20 +TENACITY_WAIT = 10 +TENACITY_STOP_AFTER_ATTEMPT = 5 + + +class TestExplainV2Server: + @pytest.mark.sequential + def test_alibi_explain_anchor_tabular(self, namespace): + spec = "../resources/iris_anchor_tabular_explainer_v2.yaml" + name = "iris-default-explainer" + vs_prefix = ( + f"seldon/{namespace}/iris-explainer/default/v2/models/" + f"iris-default-explainer/infer" + ) + + test_data = np.array([[5.964, 4.006, 2.081, 1.031]]) + inference_request = { + "parameters": {"content_type": "np"}, + "inputs": [ + { + "name": "explain", + "shape": test_data.shape, + "datatype": "FP32", + "data": test_data.tolist(), + "parameters": {"content_type": "np"}, + }, + ], + } + + retry_run(f"kubectl apply -f {spec} -n {namespace}") + + wait_for_deployment(name, namespace) + + time.sleep(AFTER_WAIT_SLEEP) + + for attempt in Retrying( + wait=wait_fixed(TENACITY_WAIT), + stop=stop_after_attempt(TENACITY_STOP_AFTER_ATTEMPT), + ): + with attempt: + r = requests.post( + f"http://localhost:8004/{vs_prefix}", + json=inference_request, + ) + # note: explanation will come back in v2 as a nested json dictionary + explanation = json.loads(r.json()["outputs"][0]["data"]) + + assert explanation["meta"]["name"] == "AnchorTabular" + assert "anchor" in explanation["data"] + assert "precision" in explanation["data"] + assert "coverage" in explanation["data"] + + run(f"kubectl delete -f {spec} -n {namespace}", shell=True)