From 553ea88b24e390df5ede1325dda45c7cd91526e6 Mon Sep 17 00:00:00 2001 From: Clive Cox Date: Wed, 13 Jun 2018 08:47:48 +0100 Subject: [PATCH] updates to allow engine to use svc names for all calls --- cluster-manager/README.txt | 4 + .../k8s/SeldonDeploymentOperatorImpl.java | 198 ++++++++++------ .../k8s/SeldonDeploymentDefaultingTest.java | 4 +- notebooks/kubectl_demo_minikube.ipynb | 224 +++++++++++++++--- notebooks/resources/random_ab_test.json | 11 +- 5 files changed, 332 insertions(+), 109 deletions(-) create mode 100644 cluster-manager/README.txt diff --git a/cluster-manager/README.txt b/cluster-manager/README.txt new file mode 100644 index 0000000000..bea5f9a39e --- /dev/null +++ b/cluster-manager/README.txt @@ -0,0 +1,4 @@ +Local testing: + +export SELDON_CLUSTER_MANAGER_POD_NAMESPACE=seldon +export ENGINE_CONTAINER_IMAGE_AND_VERSION=seldonio/engine:0.1.8-SNAPSHOT diff --git a/cluster-manager/src/main/java/io/seldon/clustermanager/k8s/SeldonDeploymentOperatorImpl.java b/cluster-manager/src/main/java/io/seldon/clustermanager/k8s/SeldonDeploymentOperatorImpl.java index c5eeeed5ab..dfd5c51d47 100644 --- a/cluster-manager/src/main/java/io/seldon/clustermanager/k8s/SeldonDeploymentOperatorImpl.java +++ b/cluster-manager/src/main/java/io/seldon/clustermanager/k8s/SeldonDeploymentOperatorImpl.java @@ -17,8 +17,11 @@ import java.util.ArrayList; import java.util.Base64; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.StringJoiner; @@ -187,7 +190,7 @@ private PredictiveUnit findPredictiveUnitForContainer(PredictiveUnit unit,String } } - private V1.Container updateContainer(V1.Container c,PredictiveUnit pu,int idx,String deploymentName,String predictorName) + private V1.Container updateContainer(V1.Container c,PredictiveUnit pu,int portNum,String deploymentName,String predictorName) { V1.Container.Builder c2Builder = V1.Container.newBuilder(c); @@ -199,8 +202,8 @@ private V1.Container updateContainer(V1.Container c,PredictiveUnit pu,int idx,St { if (pu.getEndpoint().getType() == Endpoint.EndpointType.REST) { - c2Builder.addPorts(ContainerPort.newBuilder().setName("http").setContainerPort(clusterManagerProperites.getPuContainerPortBase() + idx)); - containerPort = clusterManagerProperites.getPuContainerPortBase() + idx; + c2Builder.addPorts(ContainerPort.newBuilder().setName("http").setContainerPort(portNum)); + containerPort = portNum; if (!c.hasLivenessProbe()) { @@ -222,8 +225,8 @@ private V1.Container updateContainer(V1.Container c,PredictiveUnit pu,int idx,St } else { - c2Builder.addPorts(ContainerPort.newBuilder().setName("grpc").setContainerPort(clusterManagerProperites.getPuContainerPortBase() + idx)); - containerPort = clusterManagerProperites.getPuContainerPortBase() + idx; + c2Builder.addPorts(ContainerPort.newBuilder().setName("grpc").setContainerPort(portNum)); + containerPort = portNum; if (!c.hasLivenessProbe()) { @@ -247,7 +250,7 @@ private V1.Container updateContainer(V1.Container c,PredictiveUnit pu,int idx,St } } else - containerPort = c.getPorts(0).getContainerPort(); + throw new UnsupportedOperationException(String.format("Found container port already set with http or grpc label. This is not presently allowed. Found port {}",containerPort)); // Add environment variable for the port used in case the model needs to access it final String ENV_PREDICTIVE_UNIT_SERVICE_PORT ="PREDICTIVE_UNIT_SERVICE_PORT"; @@ -310,9 +313,14 @@ private void updatePredictiveUnitBuilderByName(PredictiveUnit.Builder puBuilder, } } - private String getPredictorServiceName(SeldonDeployment mlDep,int predictorIdx,int podTemplateIdx) + private String getPredictorServiceNameValue(SeldonDeployment mlDep,String predictorName,String containerName) { - return mlDep.getSpec().getName()+"-"+predictorIdx+"-"+podTemplateIdx; + return mlDep.getSpec().getName()+"-"+predictorName+"-"+containerName; + } + + private String getPredictorServiceNameKey(String containerName) + { + return LABEL_SELDON_APP+"-"+containerName; } @Override @@ -329,35 +337,37 @@ public SeldonDeployment defaulting(SeldonDeployment mlDep) { for(int pbIdx=0;pbIdx servicePortMap = new HashMap<>(); + int currentServicePortNum = clusterManagerProperites.getPuContainerPortBase(); for(int ptsIdx=0;ptsIdx services = new ArrayList<>(); // for each predictor Create/replace deployment String serviceLabel = mlDep.getSpec().getName(); + Set createdServices = new HashSet<>(); for(int pbIdx=0;pbIdx podLabel : spec.getMetadata().getLabelsMap().entrySet()) + //{ + // depMetaBuilder.put + //} + + for(V1.Container c : spec.getSpec().getContainersList()) { - depServiceLabelValue = getPredictorServiceName(mlDep, pbIdx, ptsIdx); + String containerServiceKey = getPredictorServiceNameKey(c.getName()); + String containerServiceValue = getPredictorServiceNameValue(mlDep, p.getName(), c.getName()); + podSpecBuilder.getSpecBuilder() .setTerminationGracePeriodSeconds(20); - //Add service - Service.Builder s = Service.newBuilder() - .setMetadata(ObjectMeta.newBuilder() - .setName(depServiceLabelValue) - .putLabels(SeldonDeploymentOperatorImpl.LABEL_SELDON_APP, depServiceLabelValue) - .putLabels("seldon-deployment-id", mlDep.getSpec().getName()) - .addOwnerReferences(ownerRef) - .putAnnotations("getambassador.io/config",getAmbassadorAnnotation(mlDep,serviceLabel)) - ); - ServiceSpec.Builder svcSpecBuilder = ServiceSpec.newBuilder(); - addServicePorts(p.getGraph(), depServiceLabelValue, svcSpecBuilder); - svcSpecBuilder.setType("ClusterIP") - .putSelector(SeldonDeploymentOperatorImpl.LABEL_SELDON_APP,depServiceLabelValue); - - s.setSpec(svcSpecBuilder); - services.add(s.build()); + if (!createdServices.contains(containerServiceValue)) + { + //Add service + Service.Builder s = Service.newBuilder() + .setMetadata(ObjectMeta.newBuilder() + .setName(containerServiceValue) + .putLabels(containerServiceKey, containerServiceValue) + .putLabels("seldon-deployment-id", mlDep.getSpec().getName()) + .addOwnerReferences(ownerRef) + ); + ServiceSpec.Builder svcSpecBuilder = ServiceSpec.newBuilder(); + addServicePort(p.getGraph(), containerServiceValue, svcSpecBuilder); + svcSpecBuilder.setType("ClusterIP") + .putSelector(containerServiceKey,containerServiceValue); + + depMetaBuilder.putLabels(containerServiceKey, containerServiceValue); + s.setSpec(svcSpecBuilder); + services.add(s.build()); + } } - + + + + Deployment deployment = V1beta1Extensions.Deployment.newBuilder() - .setMetadata(ObjectMeta.newBuilder() - .setName(depName) - .putLabels(SeldonDeploymentOperatorImpl.LABEL_SELDON_APP, depServiceLabelValue) - .putLabels(Constants.LABEL_SELDON_ID, mlDep.getSpec().getName()) - .putLabels("app", depName) - .putLabels("version", "v1") //FIXME - .putLabels(SeldonDeploymentOperatorImpl.LABEL_SELDON_TYPE_KEY, SeldonDeploymentOperatorImpl.LABEL_SELDON_TYPE_VAL) - .addOwnerReferences(ownerRef) - ) + .setMetadata(depMetaBuilder) .setSpec(DeploymentSpec.newBuilder() .setTemplate(podSpecBuilder.build()) .setStrategy(DeploymentStrategy.newBuilder().setRollingUpdate(RollingUpdateDeployment.newBuilder().setMaxUnavailable(IntOrString.newBuilder().setType(1).setStrVal("10%")))) diff --git a/cluster-manager/src/test/java/io/seldon/clustermanager/k8s/SeldonDeploymentDefaultingTest.java b/cluster-manager/src/test/java/io/seldon/clustermanager/k8s/SeldonDeploymentDefaultingTest.java index 03aa98def6..a6e402de68 100644 --- a/cluster-manager/src/test/java/io/seldon/clustermanager/k8s/SeldonDeploymentDefaultingTest.java +++ b/cluster-manager/src/test/java/io/seldon/clustermanager/k8s/SeldonDeploymentDefaultingTest.java @@ -42,7 +42,7 @@ public void testDefaulting() throws IOException Assert.assertEquals(1,mlDep2.getSpec().getPredictors(0).getComponentSpecs(0).getSpec().getContainers(0).getPortsCount()); Assert.assertEquals("http",mlDep2.getSpec().getPredictors(0).getComponentSpecs(0).getSpec().getContainers(0).getPorts(0).getName()); Assert.assertEquals(Endpoint.EndpointType.REST_VALUE,mlDep2.getSpec().getPredictors(0).getGraph().getEndpoint().getType().getNumber()); - Assert.assertEquals("0.0.0.0",mlDep2.getSpec().getPredictors(0).getGraph().getEndpoint().getServiceHost()); + Assert.assertEquals("test-deployment-fx-market-predictor-mean-classifier",mlDep2.getSpec().getPredictors(0).getGraph().getEndpoint().getServiceHost()); } @Test @@ -60,6 +60,6 @@ public void testDefaultingGrpc() throws IOException Assert.assertEquals(1,mlDep2.getSpec().getPredictors(0).getComponentSpecs(0).getSpec().getContainers(0).getPortsCount()); Assert.assertEquals("grpc",mlDep2.getSpec().getPredictors(0).getComponentSpecs(0).getSpec().getContainers(0).getPorts(0).getName()); Assert.assertEquals(Endpoint.EndpointType.GRPC_VALUE,mlDep2.getSpec().getPredictors(0).getGraph().getEndpoint().getType().getNumber()); - Assert.assertEquals("0.0.0.0",mlDep2.getSpec().getPredictors(0).getGraph().getEndpoint().getServiceHost()); + Assert.assertEquals("test-deployment-fx-market-predictor-mean-classifier",mlDep2.getSpec().getPredictors(0).getGraph().getEndpoint().getServiceHost()); } } diff --git a/notebooks/kubectl_demo_minikube.ipynb b/notebooks/kubectl_demo_minikube.ipynb index 899c1690bf..53abffcfe9 100644 --- a/notebooks/kubectl_demo_minikube.ipynb +++ b/notebooks/kubectl_demo_minikube.ipynb @@ -30,9 +30,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting local Kubernetes v1.9.4 cluster...\n", + "Starting VM...\n", + "Getting VM IP address...\n", + "Moving files into cluster...\n", + "Setting up certs...\n", + "Connecting to cluster...\n", + "Setting up kubeconfig...\n", + "Starting cluster components...\n", + "Kubectl is now configured to use the cluster.\n", + "Loading cached images from config file.\n" + ] + } + ], "source": [ "!minikube start --memory=5000 --feature-gates=CustomResourceValidation=true --extra-config=apiserver.Authorization.Mode=AlwaysAllow" ] @@ -46,9 +63,20 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "$HELM_HOME has been configured at /home/clive/.helm.\n", + "\n", + "Tiller (the Helm server-side component) has been installed into your Kubernetes Cluster.\n", + "Happy Helming!\n" + ] + } + ], "source": [ "!helm init" ] @@ -62,9 +90,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "node \"minikube\" labeled\r\n" + ] + } + ], "source": [ "!kubectl label nodes `kubectl get nodes -o jsonpath='{.items[0].metadata.name}'` role=locust --overwrite" ] @@ -85,9 +121,43 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NAME: seldon-core-crd\n", + "LAST DEPLOYED: Mon Jun 11 19:56:24 2018\n", + "NAMESPACE: default\n", + "STATUS: DEPLOYED\n", + "\n", + "RESOURCES:\n", + "==> v1/ConfigMap\n", + "NAME DATA AGE\n", + "seldon-spartakus-config 3 0s\n", + "\n", + "==> v1beta1/CustomResourceDefinition\n", + "NAME AGE\n", + "seldondeployments.machinelearning.seldon.io 0s\n", + "\n", + "==> v1beta1/Deployment\n", + "NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE\n", + "seldon-spartakus-volunteer 1 1 1 0 0s\n", + "\n", + "==> v1/Pod(related)\n", + "NAME READY STATUS RESTARTS AGE\n", + "seldon-spartakus-volunteer-5977c9fdb8-kr74m 0/1 ContainerCreating 0 0s\n", + "\n", + "\n", + "NOTES:\n", + "NOTES: TODO\n", + "\n", + "\n" + ] + } + ], "source": [ "!helm install ../helm-charts/seldon-core-crd --name seldon-core-crd \\\n", " --set usage_metrics.enabled=true \\\n", @@ -96,18 +166,61 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "namespace \"seldon\" created\r\n" + ] + } + ], "source": [ "!kubectl create namespace seldon" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NAME: seldon-core\r\n", + "LAST DEPLOYED: Mon Jun 11 19:58:11 2018\r\n", + "NAMESPACE: seldon\r\n", + "STATUS: DEPLOYED\r\n", + "\r\n", + "RESOURCES:\r\n", + "==> v1beta1/Deployment\r\n", + "NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE\r\n", + "seldon-apiserver 1 1 1 0 0s\r\n", + "seldon-cluster-manager 1 1 1 0 0s\r\n", + "redis 1 1 1 0 0s\r\n", + "\r\n", + "==> v1/Service\r\n", + "NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE\r\n", + "seldon-apiserver NodePort 10.109.80.190 8080:31062/TCP,5000:31612/TCP 0s\r\n", + "redis ClusterIP 10.110.233.69 6379/TCP 0s\r\n", + "\r\n", + "==> v1/Pod(related)\r\n", + "NAME READY STATUS RESTARTS AGE\r\n", + "seldon-apiserver-6fc74fc494-5mknt 0/1 ContainerCreating 0 0s\r\n", + "seldon-cluster-manager-86c6cb7b95-4bmzn 0/1 ContainerCreating 0 0s\r\n", + "redis-df886d999-htsnf 0/1 ContainerCreating 0 0s\r\n", + "\r\n", + "\r\n", + "NOTES:\r\n", + "NOTES: TODO\r\n", + "\r\n", + "\r\n" + ] + } + ], "source": [ "!helm install ../helm-charts/seldon-core --name seldon-core --namespace seldon \\\n", " --set rbac.enabled=false" @@ -182,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -336,9 +449,17 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "map[predictorStatus:[map[name:test-deployment-fx-market-predictor-0 replicas:1 replicasAvailable:1]]]" + ] + } + ], "source": [ "!kubectl get seldondeployments seldon-deployment-example -o jsonpath='{.status}' -n seldon" ] @@ -359,9 +480,34 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"access_token\":\"5d6cc055-2dce-4aff-9356-bb662f5151b0\",\"token_type\":\"bearer\",\"expires_in\":42707,\"scope\":\"read write\"}\n", + "{\n", + " \"meta\": {\n", + " \"puid\": \"f43rqjbdmhorbjfdsevuuocknc\",\n", + " \"tags\": {\n", + " },\n", + " \"routing\": {\n", + " \"random-ab-test\": 0\n", + " }\n", + " },\n", + " \"data\": {\n", + " \"names\": [\"proba\"],\n", + " \"tensor\": {\n", + " \"shape\": [2, 1],\n", + " \"values\": [0.05133579311531625, 0.12823373759251927]\n", + " }\n", + " }\n", + "}\n" + ] + } + ], "source": [ "rest_request()" ] @@ -375,9 +521,31 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"access_token\":\"c533fa2b-5da2-4745-ba55-f9e4a353810e\",\"token_type\":\"bearer\",\"expires_in\":43181,\"scope\":\"read write\"}\n", + "meta {\n", + " puid: \"cgagftb7u1so8bu0eaetm77763\"\n", + "}\n", + "data {\n", + " names: \"proba\"\n", + " tensor {\n", + " shape: 3\n", + " shape: 1\n", + " values: 0.12823373759251927\n", + " values: 0.39731466202150834\n", + " values: 0.8296760813561542\n", + " }\n", + "}\n", + "\n" + ] + } + ], "source": [ "grpc_request()" ] diff --git a/notebooks/resources/random_ab_test.json b/notebooks/resources/random_ab_test.json index 0a12a9dd3b..0b06c104e3 100644 --- a/notebooks/resources/random_ab_test.json +++ b/notebooks/resources/random_ab_test.json @@ -32,9 +32,14 @@ }], "terminationGracePeriodSeconds": 20 }}, - { - "spec":{ - "containers":[ + { + "metadata":{ + "labels":{ + "version":"v2" + } + }, + "spec":{ + "containers":[ { "image": "seldonio/mock_classifier:1.0", "imagePullPolicy": "IfNotPresent",