From 02479ff7c9858feb0589094a514694313a2ff8d8 Mon Sep 17 00:00:00 2001 From: Clive Cox Date: Fri, 28 May 2021 19:39:21 +0100 Subject: [PATCH 1/2] Add explainer replicas and handle scale to zero --- ...deployments.machinelearning.seldon.io.yaml | 3 + ...deployments.machinelearning.seldon.io.yaml | 9 ++ notebooks/explainer_examples.ipynb | 137 +++++++++++++++--- .../v1/seldondeployment_types.go | 1 + .../v1/zz_generated.deepcopy.go | 5 + ...elearning.seldon.io_seldondeployments.yaml | 3 + ...elearning.seldon.io_seldondeployments.yaml | 9 ++ operator/controllers/mlserver.go | 2 +- .../seldondeployment_explainers.go | 9 ++ .../seldondeployment_explainers_test.go | 6 +- ...elearning.seldon.io_seldondeployments.yaml | 3 + 11 files changed, 160 insertions(+), 27 deletions(-) diff --git a/helm-charts/seldon-core-operator/templates/customresourcedefinition_seldondeployments.machinelearning.seldon.io.yaml b/helm-charts/seldon-core-operator/templates/customresourcedefinition_seldondeployments.machinelearning.seldon.io.yaml index 15550cc48a..4e80ca312c 100644 --- a/helm-charts/seldon-core-operator/templates/customresourcedefinition_seldondeployments.machinelearning.seldon.io.yaml +++ b/helm-charts/seldon-core-operator/templates/customresourcedefinition_seldondeployments.machinelearning.seldon.io.yaml @@ -4016,6 +4016,9 @@ spec: type: string modelUri: type: string + replicas: + format: int32 + type: integer serviceAccountName: type: string storageInitializerImage: diff --git a/helm-charts/seldon-core-operator/templates/customresourcedefinition_v1_seldondeployments.machinelearning.seldon.io.yaml b/helm-charts/seldon-core-operator/templates/customresourcedefinition_v1_seldondeployments.machinelearning.seldon.io.yaml index 2b1059431e..1185cfd0e3 100644 --- a/helm-charts/seldon-core-operator/templates/customresourcedefinition_v1_seldondeployments.machinelearning.seldon.io.yaml +++ b/helm-charts/seldon-core-operator/templates/customresourcedefinition_v1_seldondeployments.machinelearning.seldon.io.yaml @@ -4879,6 +4879,9 @@ spec: type: string modelUri: type: string + replicas: + format: int32 + type: integer serviceAccountName: type: string storageInitializerImage: @@ -10671,6 +10674,9 @@ spec: type: string modelUri: type: string + replicas: + format: int32 + type: integer serviceAccountName: type: string storageInitializerImage: @@ -16463,6 +16469,9 @@ spec: type: string modelUri: type: string + replicas: + format: int32 + type: integer serviceAccountName: type: string storageInitializerImage: diff --git a/notebooks/explainer_examples.ipynb b/notebooks/explainer_examples.ipynb index 420cf6e9ca..e426c1c914 100644 --- a/notebooks/explainer_examples.ipynb +++ b/notebooks/explainer_examples.ipynb @@ -74,9 +74,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Error from server (AlreadyExists): namespaces \"seldon\" already exists\r\n" + ] + } + ], "source": [ "!kubectl create namespace seldon" ] @@ -90,14 +98,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Context \"gke_seldon-demos_europe-west1-d_gtc\" modified.\n" + "Context \"gke_seldon-demos_europe-west2-a_deploy-test\" modified.\r\n" ] } ], @@ -107,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -121,14 +129,24 @@ "## Income Prediction Model with Anchors Explainer\n", "The model and explainer used here can be trained yourself following the full example in the [Alibi Anchor Explanations for Income Notebook](https://docs.seldon.io/projects/alibi/en/latest/examples/anchor_tabular_adult.html) in the Alibi project documentation.\n", "\n", + "This example also shows you can specify the replicas for your explainer.\n", + "\n", "Note we used a python3.6 and version 0.5.2 of Alibi." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting resources/income_explainer.yaml\n" + ] + } + ], "source": [ "%%writefile resources/income_explainer.yaml\n", "apiVersion: machinelearning.seldon.io/v1\n", @@ -148,44 +166,69 @@ " explainer:\n", " type: AnchorTabular\n", " modelUri: gs://seldon-models/sklearn/income/explainer-py36-0.5.2\n", + " replicas: 2\n", " name: default\n", " replicas: 1" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "seldondeployment.machinelearning.seldon.io/income configured\r\n" + ] + } + ], "source": [ "!kubectl apply -f resources/income_explainer.yaml" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "deployment \"income-default-0-classifier\" successfully rolled out\r\n" + ] + } + ], "source": [ "!kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id=income -o jsonpath='{.items[0].metadata.name}')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "deployment \"income-default-explainer\" successfully rolled out\r\n" + ] + } + ], "source": [ "!kubectl rollout status deploy/income-default-explainer" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": { "scrolled": true }, @@ -205,11 +248,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'data': {'names': ['t:0', 't:1'], 'tensor': {'shape': [1, 2], 'values': [0.8585304277244477, 0.14146957227555243]}}, 'meta': {'requestPath': {'classifier': 'seldonio/sklearnserver:1.9.0-dev'}}}\n" + ] + } + ], "source": [ "data = np.array([[39, 7, 1, 1, 1, 1, 4, 1, 2174, 0, 40, 9]])\n", "r = sc.predict(data=data)\n", @@ -225,9 +276,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"data\":{\"names\":[\"t:0\",\"t:1\"],\"ndarray\":[[0.8585304277244477,0.14146957227555243]]},\"meta\":{\"requestPath\":{\"classifier\":\"seldonio/sklearnserver:1.9.0-dev\"}}}\r\n" + ] + } + ], "source": [ "!curl -d '{\"data\": {\"ndarray\":[[39, 7, 1, 1, 1, 1, 4, 1, 2174, 0, 40, 9]]}}' \\\n", " -X POST http://localhost:8003/seldon/seldon/income/api/v1.0/predictions \\\n", @@ -243,11 +302,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Marital Status = Never-Married', 'Hours per week <= 40.00']\n" + ] + } + ], "source": [ "data = np.array([[39, 7, 1, 1, 1, 1, 4, 1, 2174, 0, 40, 9]])\n", "explanation = sc.explain(deployment_name=\"income\", predictor=\"default\", data=data)\n", @@ -263,9 +330,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 2319 100 2228 100 91 15690 640 --:--:-- --:--:-- --:--:-- 15801\n", + "\u001b[1;39m[\n", + " \u001b[0;32m\"Marital Status = Separated\"\u001b[0m\u001b[1;39m,\n", + " \u001b[0;32m\"Capital Loss <= 0.00\"\u001b[0m\u001b[1;39m\n", + "\u001b[1;39m]\u001b[0m\n" + ] + } + ], "source": [ "!curl -X POST -H 'Content-Type: application/json' \\\n", " -d '{\"data\": {\"names\": [\"text\"], \"ndarray\": [[52, 4, 0, 2, 8, 4, 2, 0, 0, 0, 60, 9]]}}' \\\n", @@ -274,11 +355,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "seldondeployment.machinelearning.seldon.io \"income\" deleted\r\n" + ] + } + ], "source": [ "!kubectl delete -f resources/income_explainer.yaml" ] diff --git a/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go b/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go index 382e3fa406..364d8513c7 100644 --- a/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go +++ b/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go @@ -289,6 +289,7 @@ type Explainer struct { Endpoint *Endpoint `json:"endpoint,omitempty" protobuf:"bytes,6,opt,name=endpoint"` EnvSecretRefName string `json:"envSecretRefName,omitempty" protobuf:"bytes,7,opt,name=envSecretRefName"` StorageInitializerImage string `json:"storageInitializerImage,omitempty" protobuf:"bytes,8,opt,name=storageInitializerImage"` + Replicas *int32 `json:"replicas,omitempty" protobuf:"string,4,opt,name=replicas"` } // ObjectMeta is a copy of the "k8s.io/apimachinery/pkg/apis/meta/v1" ObjectMeta. diff --git a/operator/apis/machinelearning.seldon.io/v1/zz_generated.deepcopy.go b/operator/apis/machinelearning.seldon.io/v1/zz_generated.deepcopy.go index 51f02f39e1..b9a0fc8305 100644 --- a/operator/apis/machinelearning.seldon.io/v1/zz_generated.deepcopy.go +++ b/operator/apis/machinelearning.seldon.io/v1/zz_generated.deepcopy.go @@ -75,6 +75,11 @@ func (in *Explainer) DeepCopyInto(out *Explainer) { *out = new(Endpoint) **out = **in } + if in.Replicas != nil { + in, out := &in.Replicas, &out.Replicas + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Explainer. diff --git a/operator/config/crd/bases/machinelearning.seldon.io_seldondeployments.yaml b/operator/config/crd/bases/machinelearning.seldon.io_seldondeployments.yaml index 3870a336d5..a5c681c349 100644 --- a/operator/config/crd/bases/machinelearning.seldon.io_seldondeployments.yaml +++ b/operator/config/crd/bases/machinelearning.seldon.io_seldondeployments.yaml @@ -8337,6 +8337,9 @@ spec: type: string modelUri: type: string + replicas: + format: int32 + type: integer serviceAccountName: type: string storageInitializerImage: diff --git a/operator/config/crd_v1/bases/machinelearning.seldon.io_seldondeployments.yaml b/operator/config/crd_v1/bases/machinelearning.seldon.io_seldondeployments.yaml index 3a265eeca7..27f92b2a86 100644 --- a/operator/config/crd_v1/bases/machinelearning.seldon.io_seldondeployments.yaml +++ b/operator/config/crd_v1/bases/machinelearning.seldon.io_seldondeployments.yaml @@ -8578,6 +8578,9 @@ spec: type: string modelUri: type: string + replicas: + format: int32 + type: integer serviceAccountName: type: string storageInitializerImage: @@ -17452,6 +17455,9 @@ spec: type: string modelUri: type: string + replicas: + format: int32 + type: integer serviceAccountName: type: string storageInitializerImage: @@ -26326,6 +26332,9 @@ spec: type: string modelUri: type: string + replicas: + format: int32 + type: integer serviceAccountName: type: string storageInitializerImage: diff --git a/operator/controllers/mlserver.go b/operator/controllers/mlserver.go index 58f2777dd3..5a21a8097d 100644 --- a/operator/controllers/mlserver.go +++ b/operator/controllers/mlserver.go @@ -36,7 +36,7 @@ func mergeMLServerContainer(existing *v1.Container, mlServer *v1.Container) *v1. if existing.Image == "" { existing.Image = mlServer.Image } - + if existing.Env == nil { existing.Env = []v1.EnvVar{} } diff --git a/operator/controllers/seldondeployment_explainers.go b/operator/controllers/seldondeployment_explainers.go index f962395316..7468cf9ba6 100644 --- a/operator/controllers/seldondeployment_explainers.go +++ b/operator/controllers/seldondeployment_explainers.go @@ -201,6 +201,15 @@ func (ei *ExplainerInitialiser) createExplainer(mlDep *machinelearningv1.SeldonD deploy := createDeploymentWithoutEngine(depName, seldonId, &seldonPodSpec, p, mlDep, podSecurityContect) + // Set replicas to zero if main predictor or graph has zero replicas otherwise set to explainer replicas + if p.Replicas != nil && *p.Replicas == 0 { + deploy.Spec.Replicas = p.Replicas + } else if p.Replicas == nil && mlDep.Spec.Replicas != nil && *mlDep.Spec.Replicas == 0 { + deploy.Spec.Replicas = mlDep.Spec.Replicas + } else { + deploy.Spec.Replicas = p.Explainer.Replicas + } + if p.Explainer.ModelUri != "" { var err error diff --git a/operator/controllers/seldondeployment_explainers_test.go b/operator/controllers/seldondeployment_explainers_test.go index 911d7c546d..6b24774406 100644 --- a/operator/controllers/seldondeployment_explainers_test.go +++ b/operator/controllers/seldondeployment_explainers_test.go @@ -156,6 +156,7 @@ var _ = Describe("Create a Seldon Deployment with explainer", func() { const timeout = time.Second * 30 const interval = time.Second * 1 namespaceName := rand.String(10) + replicas := int32(2) By("Creating a resource") It("should create a resource with defaults", func() { Expect(k8sClient).NotTo(BeNil()) @@ -191,7 +192,8 @@ var _ = Describe("Create a Seldon Deployment with explainer", func() { Type: &modelType, }, Explainer: &machinelearningv1.Explainer{ - Type: machinelearningv1.AlibiAnchorsTabularExplainer, + Type: machinelearningv1.AlibiAnchorsTabularExplainer, + Replicas: &replicas, }, }, }, @@ -245,7 +247,7 @@ var _ = Describe("Create a Seldon Deployment with explainer", func() { return err }, timeout, interval).Should(BeNil()) Expect(len(depFetched.Spec.Template.Spec.Containers)).Should(Equal(1)) - Expect(*depFetched.Spec.Replicas).To(Equal(int32(1))) + Expect(*depFetched.Spec.Replicas).To(Equal(int32(2))) Expect(*depFetched.Spec.Template.Spec.SecurityContext.RunAsUser).To(Equal(int64(2))) Expect(depFetched.Spec.Template.Spec.Containers[0].Image).To(Equal("seldonio/alibiexplainer:1.2.0")) diff --git a/operator/testing/machinelearning.seldon.io_seldondeployments.yaml b/operator/testing/machinelearning.seldon.io_seldondeployments.yaml index 54e3b7955d..9319d265ca 100644 --- a/operator/testing/machinelearning.seldon.io_seldondeployments.yaml +++ b/operator/testing/machinelearning.seldon.io_seldondeployments.yaml @@ -6886,6 +6886,9 @@ spec: type: string modelUri: type: string + replicas: + format: int32 + type: integer serviceAccountName: type: string storageInitializerImage: From 1da64ac77e97a18cfd28a6f2ee017c614ce3184f Mon Sep 17 00:00:00 2001 From: Clive Cox Date: Tue, 1 Jun 2021 11:51:13 +0100 Subject: [PATCH 2/2] Add test for zero replicas --- .../seldondeployment_explainers_test.go | 141 +++++++++++++++++- 1 file changed, 136 insertions(+), 5 deletions(-) diff --git a/operator/controllers/seldondeployment_explainers_test.go b/operator/controllers/seldondeployment_explainers_test.go index 6b24774406..3d40f1c803 100644 --- a/operator/controllers/seldondeployment_explainers_test.go +++ b/operator/controllers/seldondeployment_explainers_test.go @@ -156,7 +156,8 @@ var _ = Describe("Create a Seldon Deployment with explainer", func() { const timeout = time.Second * 30 const interval = time.Second * 1 namespaceName := rand.String(10) - replicas := int32(2) + replicas := int32(3) + replicasExplainer := int32(2) By("Creating a resource") It("should create a resource with defaults", func() { Expect(k8sClient).NotTo(BeNil()) @@ -171,7 +172,8 @@ var _ = Describe("Create a Seldon Deployment with explainer", func() { Namespace: key.Namespace, }, Spec: machinelearningv1.SeldonDeploymentSpec{ - Name: "mydep", + Replicas: &replicas, + Name: "mydep", Predictors: []machinelearningv1.PredictorSpec{ { Name: "p1", @@ -193,7 +195,7 @@ var _ = Describe("Create a Seldon Deployment with explainer", func() { }, Explainer: &machinelearningv1.Explainer{ Type: machinelearningv1.AlibiAnchorsTabularExplainer, - Replicas: &replicas, + Replicas: &replicasExplainer, }, }, }, @@ -233,7 +235,7 @@ var _ = Describe("Create a Seldon Deployment with explainer", func() { return err }, timeout, interval).Should(BeNil()) Expect(len(depFetched.Spec.Template.Spec.Containers)).Should(Equal(2)) - Expect(*depFetched.Spec.Replicas).To(Equal(int32(1))) + Expect(*depFetched.Spec.Replicas).To(Equal(replicas)) Expect(*depFetched.Spec.Template.Spec.SecurityContext.RunAsUser).To(Equal(int64(2))) //Check explainer deployment @@ -247,7 +249,7 @@ var _ = Describe("Create a Seldon Deployment with explainer", func() { return err }, timeout, interval).Should(BeNil()) Expect(len(depFetched.Spec.Template.Spec.Containers)).Should(Equal(1)) - Expect(*depFetched.Spec.Replicas).To(Equal(int32(2))) + Expect(*depFetched.Spec.Replicas).To(Equal(replicasExplainer)) Expect(*depFetched.Spec.Template.Spec.SecurityContext.RunAsUser).To(Equal(int64(2))) Expect(depFetched.Spec.Template.Spec.Containers[0].Image).To(Equal("seldonio/alibiexplainer:1.2.0")) @@ -283,3 +285,132 @@ var _ = Describe("Create a Seldon Deployment with explainer", func() { }) }) + +var _ = Describe("Create a Seldon Deployment with zero replicas with explainer", func() { + const timeout = time.Second * 30 + const interval = time.Second * 1 + namespaceName := rand.String(10) + replicas := int32(0) + replicasExplainer := int32(3) + By("Creating a resource") + It("should create a resource with defaults", func() { + Expect(k8sClient).NotTo(BeNil()) + var modelType = machinelearningv1.MODEL + key := types.NamespacedName{ + Name: "dep", + Namespace: namespaceName, + } + instance := &machinelearningv1.SeldonDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: key.Name, + Namespace: key.Namespace, + }, + Spec: machinelearningv1.SeldonDeploymentSpec{ + Name: "mydep", + Replicas: &replicas, + Predictors: []machinelearningv1.PredictorSpec{ + { + Name: "p1", + ComponentSpecs: []*machinelearningv1.SeldonPodSpec{ + { + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Image: "seldonio/mock_classifier:1.0", + Name: "classifier", + }, + }, + }, + }, + }, + Graph: machinelearningv1.PredictiveUnit{ + Name: "classifier", + Type: &modelType, + }, + Explainer: &machinelearningv1.Explainer{ + Type: machinelearningv1.AlibiAnchorsTabularExplainer, + Replicas: &replicasExplainer, + }, + }, + }, + }, + } + + //Create namespace + namespace := &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: namespaceName, + }, + } + Expect(k8sClient.Create(context.Background(), namespace)).Should(Succeed()) + + // Run Defaulter + instance.Default() + envUseExecutor = "true" + envDefaultUser = "2" + Expect(k8sClient.Create(context.Background(), instance)).Should(Succeed()) + //time.Sleep(time.Second * 5) + + fetched := &machinelearningv1.SeldonDeployment{} + Eventually(func() error { + err := k8sClient.Get(context.Background(), key, fetched) + return err + }, timeout, interval).Should(BeNil()) + Expect(fetched.Name).Should(Equal("dep")) + + // Check deployment created + depKey := types.NamespacedName{ + Name: machinelearningv1.GetDeploymentName(instance, instance.Spec.Predictors[0], instance.Spec.Predictors[0].ComponentSpecs[0], 0), + Namespace: namespaceName, + } + depFetched := &appsv1.Deployment{} + Eventually(func() error { + err := k8sClient.Get(context.Background(), depKey, depFetched) + return err + }, timeout, interval).Should(BeNil()) + Expect(*depFetched.Spec.Replicas).To(Equal(replicas)) + + //Check explainer deployment + depKey = types.NamespacedName{ + Name: machinelearningv1.GetExplainerDeploymentName(instance.Name, &instance.Spec.Predictors[0]), + Namespace: namespaceName, + } + depFetched = &appsv1.Deployment{} + Eventually(func() error { + err := k8sClient.Get(context.Background(), depKey, depFetched) + return err + }, timeout, interval).Should(BeNil()) + Expect(*depFetched.Spec.Replicas).To(Equal(replicas)) + + //Check svc created + svcKey := types.NamespacedName{ + Name: machinelearningv1.GetContainerServiceName("dep", instance.Spec.Predictors[0], &instance.Spec.Predictors[0].ComponentSpecs[0].Spec.Containers[0]), + Namespace: namespaceName, + } + svcFetched := &v1.Service{} + Eventually(func() error { + err := k8sClient.Get(context.Background(), svcKey, svcFetched) + return err + }, timeout, interval).Should(BeNil()) + + // Check events created + serviceCreatedEvents := 0 + deploymentsCreatedEvents := 0 + evts, err := clientset.CoreV1().Events(namespaceName).Search(scheme, fetched) + Expect(err).To(BeNil()) + for _, evt := range evts.Items { + if evt.Reason == constants.EventsCreateService { + serviceCreatedEvents = serviceCreatedEvents + 1 + } else if evt.Reason == constants.EventsCreateDeployment { + deploymentsCreatedEvents = deploymentsCreatedEvents + 1 + } + } + + Expect(serviceCreatedEvents).To(Equal(3)) + Expect(deploymentsCreatedEvents).To(Equal(2)) + + Expect(k8sClient.Delete(context.Background(), instance)).Should(Succeed()) + + }) + +})