From 2d1a27d36feb0296501c79731021a21552952866 Mon Sep 17 00:00:00 2001 From: cliveseldon Date: Thu, 9 Jun 2022 14:49:05 +0100 Subject: [PATCH] Allow model scaling, k6 constant throughput tests and Prometheus/Grafana in Docker Compose install (#262) * small update to docs * Add prometheus to docker compose * add constant rate k7 test and scale replicas for model setting * test on k8s * allow grpc in constant rate tests * review comments --- docs/source/contents/metrics/index.md | 6 +- .../templates/seldon-v2-crds.yaml | 8 + k8s/yaml/seldon-v2-crds.yaml | 8 + operator/apis/mlops/v1alpha1/model_types.go | 4 +- .../crd/bases/mlops.seldon.io_models.yaml | 8 + operator/scheduler/model.go | 2 + samples/k8s-examples.ipynb | 19 +- scheduler/Dockerfile.grafana | 15 + scheduler/Makefile | 41 +- scheduler/all-base.yaml | 9 + scheduler/all-host-network.yaml | 9 + scheduler/all-internal.yaml | 9 +- .../dashboards/seldon_overview/seldon.json | 1012 +++++++++++++++++ scheduler/config/grafana/grafana.ini | 12 + .../provisioning/dashboards/dashboard.yml | 16 + .../provisioning/datasources/prometheus.yml | 14 + scheduler/config/prometheus-host.yml | 6 + scheduler/config/prometheus-internal.yml | 6 + scheduler/env.all | 1 + tests/k6/README.md | 16 +- tests/k6/components/settings.js | 32 + tests/k6/scenarios/model_constant_rate.js | 48 + 22 files changed, 1285 insertions(+), 16 deletions(-) create mode 100644 scheduler/Dockerfile.grafana create mode 100644 scheduler/config/grafana/dashboards/seldon_overview/seldon.json create mode 100644 scheduler/config/grafana/grafana.ini create mode 100644 scheduler/config/grafana/provisioning/dashboards/dashboard.yml create mode 100644 scheduler/config/grafana/provisioning/datasources/prometheus.yml create mode 100644 scheduler/config/prometheus-host.yml create mode 100644 scheduler/config/prometheus-internal.yml create mode 100644 tests/k6/scenarios/model_constant_rate.js diff --git a/docs/source/contents/metrics/index.md b/docs/source/contents/metrics/index.md index 8c9fa81962..cce3ad55ed 100644 --- a/docs/source/contents/metrics/index.md +++ b/docs/source/contents/metrics/index.md @@ -27,7 +27,11 @@ We have a prebuilt grafana dashboard that makes use of many of the metrics that ![kafka](dashboard.png) -### Installation +### Local Use + +Grafana and Prometheus are available when you run Seldon locally. You will be able to connect to the Grafana dashboard at `http://localhost:3000`. Prometheus will be available at `http://localhost:9090`. + +### Kubernetes Installation Download the dashboard from [SCv2 dashboard](https://github.com/SeldonIO/seldon-core-v2/blob/master/prometheus/dashboards/Seldon%20Core%20Model%20Mesh%20Monitoring.json) and import it in grafana, making sure that the data source is pointing to the correct prometheus store. Find more information on how to import the dashboard [here](https://grafana.com/docs/grafana/latest/dashboards/export-import/) diff --git a/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml b/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml index d190506d1d..9162c31480 100644 --- a/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml +++ b/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml @@ -430,11 +430,19 @@ spec: that was last processed by the controller. format: int64 type: integer + replicas: + description: 'Important: Run "make" to regenerate code after modifying + this file' + format: int32 + type: integer type: object type: object served: true storage: true subresources: + scale: + specReplicasPath: .spec.replicas + statusReplicasPath: .status.replicas status: {} status: acceptedNames: diff --git a/k8s/yaml/seldon-v2-crds.yaml b/k8s/yaml/seldon-v2-crds.yaml index d190506d1d..9162c31480 100644 --- a/k8s/yaml/seldon-v2-crds.yaml +++ b/k8s/yaml/seldon-v2-crds.yaml @@ -430,11 +430,19 @@ spec: that was last processed by the controller. format: int64 type: integer + replicas: + description: 'Important: Run "make" to regenerate code after modifying + this file' + format: int32 + type: integer type: object type: object served: true storage: true subresources: + scale: + specReplicasPath: .spec.replicas + statusReplicasPath: .status.replicas status: {} status: acceptedNames: diff --git a/operator/apis/mlops/v1alpha1/model_types.go b/operator/apis/mlops/v1alpha1/model_types.go index aec0575661..07b3e4d37e 100644 --- a/operator/apis/mlops/v1alpha1/model_types.go +++ b/operator/apis/mlops/v1alpha1/model_types.go @@ -89,13 +89,15 @@ type InferenceArtifactSpec struct { // ModelStatus defines the observed state of Model type ModelStatus struct { - // Important: Run "make" to regenerate code after modifying this file + // Total number of replicas targeted by this model + Replicas int32 `json:"replicas,omitempty"` duckv1.Status `json:",inline"` } //+kubebuilder:object:root=true //+kubebuilder:subresource:status //+kubebuilder:resource:shortName=mlm +// +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas // Model is the Schema for the models API type Model struct { diff --git a/operator/config/crd/bases/mlops.seldon.io_models.yaml b/operator/config/crd/bases/mlops.seldon.io_models.yaml index 3f0bcb157c..a6714da54e 100644 --- a/operator/config/crd/bases/mlops.seldon.io_models.yaml +++ b/operator/config/crd/bases/mlops.seldon.io_models.yaml @@ -151,11 +151,19 @@ spec: that was last processed by the controller. format: int64 type: integer + replicas: + description: 'Important: Run "make" to regenerate code after modifying + this file' + format: int32 + type: integer type: object type: object served: true storage: true subresources: + scale: + specReplicasPath: .spec.replicas + statusReplicasPath: .status.replicas status: {} status: acceptedNames: diff --git a/operator/scheduler/model.go b/operator/scheduler/model.go index 7b0dde9a17..d4bcd24d43 100644 --- a/operator/scheduler/model.go +++ b/operator/scheduler/model.go @@ -135,6 +135,8 @@ func (s *SchedulerClient) SubscribeModelEvents(ctx context.Context) error { logger.Info("Setting model to not ready", "name", event.ModelName, "state", latestVersionStatus.State.State.String()) latestModel.Status.CreateAndSetCondition(v1alpha1.ModelReady, false, latestVersionStatus.State.Reason) } + // Set the total number of replicas targeted by this model + latestModel.Status.Replicas = int32(latestVersionStatus.State.GetAvailableReplicas() + latestVersionStatus.State.GetUnavailableReplicas()) return s.updateModelStatus(latestModel) }) if retryErr != nil { diff --git a/samples/k8s-examples.ipynb b/samples/k8s-examples.ipynb index 1b420bedd5..c2ed36b749 100644 --- a/samples/k8s-examples.ipynb +++ b/samples/k8s-examples.ipynb @@ -17,7 +17,7 @@ { "data": { "text/plain": [ - "'172.22.255.9'" + "'172.31.255.9'" ] }, "execution_count": 1, @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "beneficial-logan", "metadata": {}, "outputs": [ @@ -102,12 +102,12 @@ } ], "source": [ - "build!kubectl wait --for condition=ready --timeout=300s model --all -n seldon-mesh" + "!kubectl wait --for condition=ready --timeout=300s model --all -n seldon-mesh" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "prepared-duration", "metadata": {}, "outputs": [ @@ -118,16 +118,17 @@ "{\r\n", " \"conditions\": [\r\n", " {\r\n", - " \"lastTransitionTime\": \"2022-05-26T10:09:32Z\",\r\n", + " \"lastTransitionTime\": \"2022-06-03T14:35:59Z\",\r\n", " \"status\": \"True\",\r\n", " \"type\": \"ModelReady\"\r\n", " },\r\n", " {\r\n", - " \"lastTransitionTime\": \"2022-05-26T10:09:32Z\",\r\n", + " \"lastTransitionTime\": \"2022-06-03T14:35:59Z\",\r\n", " \"status\": \"True\",\r\n", " \"type\": \"Ready\"\r\n", " }\r\n", - " ]\r\n", + " ],\r\n", + " \"replicas\": 1\r\n", "}\r\n" ] } @@ -138,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "67900afd", "metadata": {}, "outputs": [ @@ -149,7 +150,7 @@ "{\r\n", "\t\"model_name\": \"iris_1\",\r\n", "\t\"model_version\": \"1\",\r\n", - "\t\"id\": \"5890ac9d-c1ed-4343-b9e6-a460bead5fe8\",\r\n", + "\t\"id\": \"3be6542c-5ad2-4ebc-a0d4-842377653b5d\",\r\n", "\t\"parameters\": null,\r\n", "\t\"outputs\": [\r\n", "\t\t{\r\n", diff --git a/scheduler/Dockerfile.grafana b/scheduler/Dockerfile.grafana new file mode 100644 index 0000000000..5f5aad2493 --- /dev/null +++ b/scheduler/Dockerfile.grafana @@ -0,0 +1,15 @@ +FROM grafana/grafana:8.5.4 + +# Disable Login form or not +ENV GF_AUTH_DISABLE_LOGIN_FORM "true" +# Allow anonymous authentication or not +ENV GF_AUTH_ANONYMOUS_ENABLED "true" +# Role of anonymous user +ENV GF_AUTH_ANONYMOUS_ORG_ROLE "Admin" + +# Add provisioning +ADD ./config/grafana/provisioning /etc/grafana/provisioning +# Add configuration file +ADD ./config/grafana/grafana.ini /etc/grafana/grafana.ini +# Add dashboard json files +ADD ./config/grafana/dashboards /etc/grafana/dashboards \ No newline at end of file diff --git a/scheduler/Makefile b/scheduler/Makefile index 7e1a88df09..43992e012b 100644 --- a/scheduler/Makefile +++ b/scheduler/Makefile @@ -7,6 +7,8 @@ MODELGATEWAY_IMG ?= ${DOCKERHUB_USERNAME}/seldon-modelgateway:${CUSTOM_IMAGE_TAG PIPELINEGATEWAY_IMG ?= ${DOCKERHUB_USERNAME}/seldon-pipelinegateway:${CUSTOM_IMAGE_TAG} DATAFLOW_IMG ?= ${DOCKERHUB_USERNAME}/seldon-dataflow-engine:${CUSTOM_IMAGE_TAG} ENVOY_IMG ?= ${DOCKERHUB_USERNAME}/seldon-envoy:${CUSTOM_IMAGE_TAG} +# Grafana image only used for Docker compose not k8s +GRAFANA_IMG ?= ${DOCKERHUB_USERNAME}/seldon-grafana:${CUSTOM_IMAGE_TAG} MLSERVER_IMG ?= seldonio/mlserver:1.1.0.dev3 TRITON_IMG ?= nvcr.io/nvidia/tritonserver:21.12-py3 KIND_NAME=ansible @@ -143,11 +145,19 @@ docker-build-dataflow: copy-apis data-flow/opentelemetry-javaagent.jar docker-push-dataflow: docker push ${DATAFLOW_IMG} +.PHONY: docker-build-grafana +docker-build-grafana: + docker build -t ${GRAFANA_IMG} -f Dockerfile.grafana . + +.PHONY: docker-push-grafana +docker-push-grafana: + docker push ${GRAFANA_IMG} + .PHONY: docker-build-all -docker-build-all: docker-build-dataflow docker-build-agent docker-build-envoy docker-build-rclone docker-build-scheduler docker-build-modelgateway docker-build-pipelinegateway +docker-build-all: docker-build-dataflow docker-build-agent docker-build-envoy docker-build-rclone docker-build-scheduler docker-build-modelgateway docker-build-pipelinegateway docker-build-grafana .PHONY: docker-push-all -docker-push-all: docker-push-agent docker-push-envoy docker-push-rclone docker-push-scheduler docker-push-modelgateway docker-push-pipelinegateway docker-push-dataflow +docker-push-all: docker-push-agent docker-push-envoy docker-push-rclone docker-push-scheduler docker-push-modelgateway docker-push-pipelinegateway docker-push-dataflow docker-push-grafana ##################################### @@ -203,7 +213,8 @@ DOCKER_COMPOSE_COMMON_IMAGES = \ RCLONE_IMAGE_AND_TAG=${RCLONE_IMG} \ SERVER_MLSERVER_IMAGE_AND_TAG=${MLSERVER_IMG} \ TRITON_LOG_LEVEL=${DOCKER_COMPOSE_TRITON_LOG_LEVEL} \ - SERVER_TRITON_IMAGE_AND_TAG=${TRITON_IMG} + SERVER_TRITON_IMAGE_AND_TAG=${TRITON_IMG} \ + GRAFANA_IMAGE_AND_TAG=${GRAFANA_IMG} DOCKER_COMPOSE_TRITON_LOG_LEVEL ?= 0 @@ -440,6 +451,30 @@ stop-kafka: start-kafka-host: ${DOCKER_COMPOSE_SERVICE_HOST_COMMAND} up -d kafka +.PHONY: start-prometheus +start-prometheus: + ${DOCKER_COMPOSE_SERVICE_COMMAND} up -d prometheus + +.PHONY: stop-prometheus +stop-prometheus: + ${DOCKER_COMPOSE_SERVICE_COMMAND} rm --stop --force ${DOCKER_COMPOSE_REMOVE_VOLUMES} prometheus + +.PHONY: start-prometheus-host +start-prometheus-host: + ${DOCKER_COMPOSE_SERVICE_HOST_COMMAND} up -d prometheus + +.PHONY: start-grafana +start-grafana: + ${DOCKER_COMPOSE_SERVICE_COMMAND} up -d grafana + +.PHONY: stop-grafana +stop-grafana: + ${DOCKER_COMPOSE_SERVICE_COMMAND} rm --stop --force ${DOCKER_COMPOSE_REMOVE_VOLUMES} grafana + +.PHONY: start-grafana-host +start-grafana-host: + ${DOCKER_COMPOSE_SERVICE_HOST_COMMAND} up -d grafana + .PHONY: stop-kafka-host stop-kafka-host: ${DOCKER_COMPOSE_SERVICE_HOST_COMMAND} rm --stop --force ${DOCKER_COMPOSE_REMOVE_VOLUMES} kafka diff --git a/scheduler/all-base.yaml b/scheduler/all-base.yaml index 7c223cf036..50613d1b8b 100644 --- a/scheduler/all-base.yaml +++ b/scheduler/all-base.yaml @@ -165,3 +165,12 @@ services: environment: - ALLOW_ANONYMOUS_LOGIN=yes + prometheus: + image: prom/prometheus:latest + ports: + - "9090:9090" + + grafana: + image: "${GRAFANA_IMAGE_AND_TAG}" + ports: + - 3000:3000 diff --git a/scheduler/all-host-network.yaml b/scheduler/all-host-network.yaml index d949876869..36ecb882e7 100644 --- a/scheduler/all-host-network.yaml +++ b/scheduler/all-host-network.yaml @@ -164,3 +164,12 @@ services: zookeeper: ports: - "2181:2181" + + prometheus: + command: + - --config.file=/etc/prometheus/prometheus-host.yml + volumes: + - type: bind + source: ./config + target: /etc/prometheus + diff --git a/scheduler/all-internal.yaml b/scheduler/all-internal.yaml index 1166105bc8..e45df07682 100644 --- a/scheduler/all-internal.yaml +++ b/scheduler/all-internal.yaml @@ -185,7 +185,6 @@ services: dataflow: - environment: environment: - SELDON_UPSTREAM_PORT=${SCHEDULER_DATAFLOW_PORT} - SELDON_KAFKA_BOOTSTRAP_SERVERS=kafka:${KAFKA_BROKER_INTERNAL_PORT} @@ -218,3 +217,11 @@ services: zookeeper: ports: - "2181:2181" + + prometheus: + command: + - --config.file=/etc/prometheus/prometheus-internal.yml + volumes: + - type: bind + source: ./config + target: /etc/prometheus diff --git a/scheduler/config/grafana/dashboards/seldon_overview/seldon.json b/scheduler/config/grafana/dashboards/seldon_overview/seldon.json new file mode 100644 index 0000000000..4ce33689a6 --- /dev/null +++ b/scheduler/config/grafana/dashboards/seldon_overview/seldon.json @@ -0,0 +1,1012 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 26, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 0 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "limit": 2, + "values": false + }, + "text": {}, + "textMode": "value_and_name" + }, + "pluginVersion": "8.4.6", + "targets": [ + { + "datasource": "prometheus", + "exemplar": true, + "expr": "count (seldon_loaded_model_memory_bytes_gauge >0 )", + "hide": false, + "interval": "", + "legendFormat": "In-memory", + "refId": "B" + }, + { + "datasource": "prometheus", + "exemplar": true, + "expr": "sum (seldon_loaded_model_gauge)", + "hide": false, + "interval": "", + "legendFormat": "Registered", + "refId": "A" + } + ], + "title": "Models", + "transformations": [], + "type": "stat" + }, + { + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 3, + "y": 0 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.6", + "targets": [ + { + "datasource": "prometheus", + "exemplar": true, + "expr": "sum by(server) (seldon_loaded_model_gauge)", + "format": "table", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Registered Model Replicas", + "transformations": [ + { + "id": "groupBy", + "options": { + "fields": { + "Value": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "server": { + "aggregations": [], + "operation": "groupby" + }, + "server_replica": { + "aggregations": [], + "operation": "groupby" + } + } + } + } + ], + "type": "stat" + }, + { + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 7, + "y": 0 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.6", + "targets": [ + { + "datasource": "prometheus", + "exemplar": true, + "expr": "count by(server) (seldon_loaded_model_memory_bytes_gauge > 0)", + "format": "table", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "In-Memory Model Replicas", + "transformations": [ + { + "id": "groupBy", + "options": { + "fields": { + "Value": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "server": { + "aggregations": [], + "operation": "groupby" + }, + "server_replica": { + "aggregations": [], + "operation": "groupby" + } + } + } + } + ], + "type": "stat" + }, + { + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 0.8 + }, + { + "color": "dark-red", + "value": 0.9 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 5, + "x": 11, + "y": 0 + }, + "id": 9, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "limit": 4, + "values": true + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.4.6", + "targets": [ + { + "datasource": "prometheus", + "exemplar": true, + "expr": "sum by(server) (seldon_loaded_model_memory_bytes_gauge) / sum by(server) (seldon_server_replica_memory_capacity_overcommit_bytes_gauge)", + "format": "table", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "In-Memory Model Replicas (Memory Slots)", + "transformations": [ + { + "id": "groupBy", + "options": { + "fields": { + "Time": { + "aggregations": [], + "operation": "groupby" + }, + "Value": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #A": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "Value #B": { + "aggregations": [ + "sum" + ], + "operation": "aggregate" + }, + "model_internal": { + "aggregations": [], + "operation": "groupby" + }, + "server": { + "aggregations": [], + "operation": "groupby" + }, + "server_replica": { + "aggregations": [], + "operation": "groupby" + } + } + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "Value #A (sum)": { + "aggregations": [ + "last" + ], + "operation": "aggregate" + }, + "Value #B (sum)": { + "aggregations": [ + "last" + ], + "operation": "aggregate" + }, + "Value (lastNotNull)": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "server": { + "aggregations": [], + "operation": "groupby" + }, + "server_replica": { + "aggregations": [], + "operation": "groupby" + } + } + } + } + ], + "type": "gauge" + }, + { + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 5 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "prometheus", + "exemplar": true, + "expr": "sum(rate(seldon_cache_evict_count[1m]))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Evict Rate", + "refId": "A" + }, + { + "datasource": "prometheus", + "exemplar": true, + "expr": "sum(rate(seldon_cache_miss_count[1m]))", + "hide": false, + "interval": "", + "legendFormat": "Miss Rate", + "refId": "B" + } + ], + "title": "Model Evict/Miss Rate [1m]", + "type": "timeseries" + }, + { + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 5 + }, + "id": 20, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "prometheus", + "exemplar": true, + "expr": "sum by (server) (rate(seldon_load_model_counter[1m]))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{server}}_Load", + "refId": "A" + }, + { + "datasource": "prometheus", + "exemplar": true, + "expr": "sum by (server) (rate(seldon_unload_model_counter[1m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Unloa{{server}}_Loadd", + "refId": "B" + } + ], + "title": "Model Load/Unload Rate [1m]", + "type": "timeseries" + }, + { + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 12 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.6", + "targets": [ + { + "datasource": "prometheus", + "exemplar": true, + "expr": "sum(seldon_server_replica_memory_capacity_bytes_gauge)", + "hide": false, + "interval": "", + "legendFormat": "Capacity", + "refId": "B" + }, + { + "datasource": "prometheus", + "exemplar": true, + "expr": "sum(seldon_loaded_model_memory_bytes_gauge)", + "hide": false, + "interval": "", + "legendFormat": "Used", + "refId": "C" + }, + { + "datasource": "prometheus", + "exemplar": true, + "expr": "sum(seldon_server_replica_memory_capacity_overcommit_bytes_gauge)", + "hide": false, + "interval": "", + "legendFormat": "Capacity with Over-commit", + "refId": "A" + }, + { + "datasource": "prometheus", + "exemplar": true, + "expr": "sum(seldon_loaded_model_memory_bytes_gauge) + sum(seldon_evicted_model_memory_bytes_gauge)", + "hide": false, + "interval": "", + "legendFormat": "Used with Over-commit", + "refId": "D" + } + ], + "title": "Memory Slots", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 12 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.6", + "targets": [ + { + "datasource": "prometheus", + "exemplar": true, + "expr": "avg((rate(seldon_aggregate_infer_seconds_total[1m]) / rate(seldon_aggregate_infer_total[1m])) > 0 ) by (server, method_type)", + "hide": false, + "interval": "", + "legendFormat": "{{server}}_{{method_type}}_avg", + "refId": "A" + } + ], + "title": "Infer Latency [1m]", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 20 + }, + "id": 19, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "prometheus", + "exemplar": true, + "expr": "container_memory_working_set_bytes{container=\"mlserver\"}", + "interval": "10s", + "legendFormat": "{{pod}}", + "refId": "A" + }, + { + "datasource": "prometheus", + "exemplar": true, + "expr": "container_memory_working_set_bytes{container=\"triton\"}", + "hide": false, + "interval": "10s", + "legendFormat": "{{pod}}", + "refId": "B" + } + ], + "title": "Memory Used", + "type": "timeseries" + }, + { + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 20 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "prometheus", + "exemplar": true, + "expr": "rate (container_cpu_usage_seconds_total{container=\"mlserver\"}[1m])", + "interval": "10s", + "legendFormat": "{{pod}}", + "refId": "A" + }, + { + "datasource": "prometheus", + "exemplar": true, + "expr": "rate (container_cpu_usage_seconds_total{container=\"triton\"}[1m])", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "refId": "B" + } + ], + "title": "CPU [1m]", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 35, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Seldon Core Model Mesh Monitoring", + "uid": "MHloCP_7z", + "version": 22, + "weekStart": "" +} diff --git a/scheduler/config/grafana/grafana.ini b/scheduler/config/grafana/grafana.ini new file mode 100644 index 0000000000..13f87ed2a4 --- /dev/null +++ b/scheduler/config/grafana/grafana.ini @@ -0,0 +1,12 @@ +[paths] +provisioning = /etc/grafana/provisioning + +[server] +enable_gzip = true + +[security] +# If you want to embed grafana into an iframe for example +allow_embedding = true + +[users] +default_theme = dark \ No newline at end of file diff --git a/scheduler/config/grafana/provisioning/dashboards/dashboard.yml b/scheduler/config/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000000..04dcb5c82e --- /dev/null +++ b/scheduler/config/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,16 @@ +apiVersion: 1 + +providers: + - name: Seldon + org_id: 1 + # name of the dashboard folder. Required + folder: '' + # provider type. Required + type: 'file' + disableDeletion: false + editable: true + updateIntervalSeconds: 5 + allowUiUpdates: true + options: + path: /etc/grafana/dashboards + foldersFromFilesStructure: true diff --git a/scheduler/config/grafana/provisioning/datasources/prometheus.yml b/scheduler/config/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000000..b4603657bc --- /dev/null +++ b/scheduler/config/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,14 @@ +apiVersion: 1 + +deleteDatasources: + - name: prometheus + orgId: 1 + +datasources: + - name: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + orgId: 1 + + diff --git a/scheduler/config/prometheus-host.yml b/scheduler/config/prometheus-host.yml new file mode 100644 index 0000000000..f1b60db508 --- /dev/null +++ b/scheduler/config/prometheus-host.yml @@ -0,0 +1,6 @@ +scrape_configs: + - job_name: 'agent' + scrape_interval: 10s + static_configs: + - targets: ['0.0.0.0:9006'] + - targets: ['0.0.0.0:9007'] diff --git a/scheduler/config/prometheus-internal.yml b/scheduler/config/prometheus-internal.yml new file mode 100644 index 0000000000..7f8f74da19 --- /dev/null +++ b/scheduler/config/prometheus-internal.yml @@ -0,0 +1,6 @@ +scrape_configs: + - job_name: 'agent' + scrape_interval: 10s + static_configs: + - targets: ['agent-mlserver:9006'] + - targets: ['agent-triton:9006'] diff --git a/scheduler/env.all b/scheduler/env.all index 5e66068070..5e5bfff9dc 100644 --- a/scheduler/env.all +++ b/scheduler/env.all @@ -38,6 +38,7 @@ SERVER_MLSERVER_IMAGE_AND_TAG=seldonio/mlserver:1.0.1 SERVER_TRITON_IMAGE_AND_TAG=nvcr.io/nvidia/tritonserver:21.12-py3 SCHEDULER_IMAGE_AND_TAG=seldonio/seldon-scheduler:latest KAFKA_IMAGE_AND_TAG=quay.io/strimzi/kafka:0.28.0-kafka-3.1.0 +GRAFANA_IMAGE_AND_TAG=seldonio/seldon-grafana:latest AGENT_OVERCOMMIT_PERCENTAGE=20 AGENT_MEMORY_REQUEST=10000000 diff --git a/tests/k6/README.md b/tests/k6/README.md index d87f2b4e54..fd32ac8620 100644 --- a/tests/k6/README.md +++ b/tests/k6/README.md @@ -37,4 +37,18 @@ For k8s you will need to update the default endpoints to the services exposed, e ``` MODEL_TYPE="tfsimple" SCHEDULER_ENDPOINT=172.18.255.4:9004 INFER_GRPC_ENDPOINT=172.18.255.3:80 INFER_HTTP_ENDPOINT=http://172.18.255.3 k6 run -u 5 -i 50 scenarios/load_predict_unload.js -``` \ No newline at end of file +``` + +## Constant Throughput Test + +Run against model name `iris` which is of type `iris` against a envoy http ip as given. + +``` + MODEL_NAME="iris" MODEL_TYPE="iris" INFER_HTTP_ENDPOINT="http://172.31.255.9" k6 run scenarios/model_constant_rate.js +``` + +Run localy but with grpc + +``` +INFER_TYPE="grpc" MODEL_TYPE="iris" k6 run scenarios/model_constant_rate.js +``` diff --git a/tests/k6/components/settings.js b/tests/k6/components/settings.js index 56564e7301..d8eb4f49f7 100644 --- a/tests/k6/components/settings.js +++ b/tests/k6/components/settings.js @@ -110,6 +110,20 @@ function inferBatchSize() { return 1 } +function modelStartIdx() { + if (__ENV.MODEL_START_IDX) { + return parseInt(__ENV.MODEL_START_IDX) + } + return 0 +} + +function modelEndIdx() { + if (__ENV.MODEL_END_IDX) { + return parseInt(__ENV.MODEL_END_IDX) + } + return 0 +} + function isLoadPipeline() { if (__ENV.DATAFLOW_TAG) { return !(__ENV.DATAFLOW_TAG === "") @@ -131,6 +145,13 @@ function modelNamePrefix() { return "model" } +function modelName() { + if (__ENV.MODEL_NAME) { + return __ENV.MODEL_NAME + } + return "" +} + function experimentNamePrefix() { if (__ENV.EXPERIMENTNAME_PREFIX) { return __ENV.EXPERIMENTNAME_PREFIX @@ -138,6 +159,13 @@ function experimentNamePrefix() { return "experiment" } +function inferType() { + if (__ENV.INFER_TYPE) { + return __ENV.INFER_REST + } + return "REST" +} + export function getConfig() { return { "schedulerEndpoint": schedulerEndpoint(), @@ -160,5 +188,9 @@ export function getConfig() { "experimentNamePrefix": experimentNamePrefix(), "loadExperiment" : loadExperiment(), "unloadExperiment": unloadExperiment(), + "modelStartIdx" : modelStartIdx(), + "modelEndIdx" : modelEndIdx(), + "modelName" : modelName(), + "inferType" : inferType(), } } \ No newline at end of file diff --git a/tests/k6/scenarios/model_constant_rate.js b/tests/k6/scenarios/model_constant_rate.js new file mode 100644 index 0000000000..8db8221589 --- /dev/null +++ b/tests/k6/scenarios/model_constant_rate.js @@ -0,0 +1,48 @@ +import {inferHttpLoop, inferGrpcLoop, inferHttp, inferGrpc, connectV2Grpc, disconnectV2Grpc} from '../components/v2.js' +import {getConfig} from '../components/settings.js' +import {generateModel } from '../components/model.js' +import { vu, scenario } from 'k6/execution'; +import { randomIntBetween } from 'https://jslib.k6.io/k6-utils/1.2.0/index.js'; + +export const options = { + scenarios: { + constant_request_rate: { + executor: 'constant-arrival-rate', + rate: 10, + timeUnit: '1s', + duration: '5s', + preAllocatedVUs: 1, // how large the initial pool of VUs would be + maxVUs: 100, // if the preAllocatedVUs are not enough, we can initialize more + }, + }, +}; + +export function setup() { + return getConfig() +} + +export default function (config) { + const modelIdx = randomIntBetween(config.modelStartIdx, config.modelEndIdx) + const modelName = config.modelNamePrefix + modelIdx.toString() + const model = generateModel(config.modelType, modelName, 0, 1, + config.isSchedulerProxy, config.modelMemoryBytes, config.inferBatchSize) + const modelDefn = model.modelDefn + const httpEndpoint = config.inferHttpEndpoint + const grpcEndpoint = config.inferGrpcEndpoint + + if (config.inferType === "REST") { + if (config.modelName !== "") { + inferHttp(httpEndpoint, config.modelName, model.inference.http, true, "") + } else { + inferHttp(httpEndpoint, modelName, model.inference.http, true, "") + } + } else { + connectV2Grpc(grpcEndpoint) + if (config.modelName !== "") { + inferGrpc(config.modelName, model.inference.grpc, true, "") + } else { + inferGrpc(modelName, model.inference.grpc, true, "") + } + disconnectV2Grpc() + } +} \ No newline at end of file