From 2d1a27d36feb0296501c79731021a21552952866 Mon Sep 17 00:00:00 2001
From: cliveseldon <cc@seldon.io>
Date: Thu, 9 Jun 2022 14:49:05 +0100
Subject: [PATCH] Allow model scaling, k6 constant throughput tests and
 Prometheus/Grafana in Docker Compose install (#262)

* small update to docs

* Add prometheus to docker compose

* add constant rate k7 test and scale replicas for model setting

* test on k8s

* allow grpc in constant rate tests

* review comments
---
 docs/source/contents/metrics/index.md         |    6 +-
 .../templates/seldon-v2-crds.yaml             |    8 +
 k8s/yaml/seldon-v2-crds.yaml                  |    8 +
 operator/apis/mlops/v1alpha1/model_types.go   |    4 +-
 .../crd/bases/mlops.seldon.io_models.yaml     |    8 +
 operator/scheduler/model.go                   |    2 +
 samples/k8s-examples.ipynb                    |   19 +-
 scheduler/Dockerfile.grafana                  |   15 +
 scheduler/Makefile                            |   41 +-
 scheduler/all-base.yaml                       |    9 +
 scheduler/all-host-network.yaml               |    9 +
 scheduler/all-internal.yaml                   |    9 +-
 .../dashboards/seldon_overview/seldon.json    | 1012 +++++++++++++++++
 scheduler/config/grafana/grafana.ini          |   12 +
 .../provisioning/dashboards/dashboard.yml     |   16 +
 .../provisioning/datasources/prometheus.yml   |   14 +
 scheduler/config/prometheus-host.yml          |    6 +
 scheduler/config/prometheus-internal.yml      |    6 +
 scheduler/env.all                             |    1 +
 tests/k6/README.md                            |   16 +-
 tests/k6/components/settings.js               |   32 +
 tests/k6/scenarios/model_constant_rate.js     |   48 +
 22 files changed, 1285 insertions(+), 16 deletions(-)
 create mode 100644 scheduler/Dockerfile.grafana
 create mode 100644 scheduler/config/grafana/dashboards/seldon_overview/seldon.json
 create mode 100644 scheduler/config/grafana/grafana.ini
 create mode 100644 scheduler/config/grafana/provisioning/dashboards/dashboard.yml
 create mode 100644 scheduler/config/grafana/provisioning/datasources/prometheus.yml
 create mode 100644 scheduler/config/prometheus-host.yml
 create mode 100644 scheduler/config/prometheus-internal.yml
 create mode 100644 tests/k6/scenarios/model_constant_rate.js

diff --git a/docs/source/contents/metrics/index.md b/docs/source/contents/metrics/index.md
index 8c9fa81962..cce3ad55ed 100644
--- a/docs/source/contents/metrics/index.md
+++ b/docs/source/contents/metrics/index.md
@@ -27,7 +27,11 @@ We have a prebuilt grafana dashboard that makes use of many of the metrics that
 
 ![kafka](dashboard.png)
 
-### Installation
+### Local Use
+
+Grafana and Prometheus are available when you run Seldon locally. You will be able to connect to the Grafana dashboard at `http://localhost:3000`. Prometheus will be available at `http://localhost:9090`.
+
+### Kubernetes Installation
 
 Download the dashboard from [SCv2 dashboard](https://github.com/SeldonIO/seldon-core-v2/blob/master/prometheus/dashboards/Seldon%20Core%20Model%20Mesh%20Monitoring.json) and import it in grafana, making sure that the data source is pointing to the correct prometheus store. Find more information on how to import the dashboard [here](https://grafana.com/docs/grafana/latest/dashboards/export-import/)
 
diff --git a/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml b/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml
index d190506d1d..9162c31480 100644
--- a/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml
+++ b/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml
@@ -430,11 +430,19 @@ spec:
                   that was last processed by the controller.
                 format: int64
                 type: integer
+              replicas:
+                description: 'Important: Run "make" to regenerate code after modifying
+                  this file'
+                format: int32
+                type: integer
             type: object
         type: object
     served: true
     storage: true
     subresources:
+      scale:
+        specReplicasPath: .spec.replicas
+        statusReplicasPath: .status.replicas
       status: {}
 status:
   acceptedNames:
diff --git a/k8s/yaml/seldon-v2-crds.yaml b/k8s/yaml/seldon-v2-crds.yaml
index d190506d1d..9162c31480 100644
--- a/k8s/yaml/seldon-v2-crds.yaml
+++ b/k8s/yaml/seldon-v2-crds.yaml
@@ -430,11 +430,19 @@ spec:
                   that was last processed by the controller.
                 format: int64
                 type: integer
+              replicas:
+                description: 'Important: Run "make" to regenerate code after modifying
+                  this file'
+                format: int32
+                type: integer
             type: object
         type: object
     served: true
     storage: true
     subresources:
+      scale:
+        specReplicasPath: .spec.replicas
+        statusReplicasPath: .status.replicas
       status: {}
 status:
   acceptedNames:
diff --git a/operator/apis/mlops/v1alpha1/model_types.go b/operator/apis/mlops/v1alpha1/model_types.go
index aec0575661..07b3e4d37e 100644
--- a/operator/apis/mlops/v1alpha1/model_types.go
+++ b/operator/apis/mlops/v1alpha1/model_types.go
@@ -89,13 +89,15 @@ type InferenceArtifactSpec struct {
 
 // ModelStatus defines the observed state of Model
 type ModelStatus struct {
-	// Important: Run "make" to regenerate code after modifying this file
+	// Total number of replicas targeted by this model
+	Replicas      int32 `json:"replicas,omitempty"`
 	duckv1.Status `json:",inline"`
 }
 
 //+kubebuilder:object:root=true
 //+kubebuilder:subresource:status
 //+kubebuilder:resource:shortName=mlm
+// +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas
 
 // Model is the Schema for the models API
 type Model struct {
diff --git a/operator/config/crd/bases/mlops.seldon.io_models.yaml b/operator/config/crd/bases/mlops.seldon.io_models.yaml
index 3f0bcb157c..a6714da54e 100644
--- a/operator/config/crd/bases/mlops.seldon.io_models.yaml
+++ b/operator/config/crd/bases/mlops.seldon.io_models.yaml
@@ -151,11 +151,19 @@ spec:
                   that was last processed by the controller.
                 format: int64
                 type: integer
+              replicas:
+                description: 'Important: Run "make" to regenerate code after modifying
+                  this file'
+                format: int32
+                type: integer
             type: object
         type: object
     served: true
     storage: true
     subresources:
+      scale:
+        specReplicasPath: .spec.replicas
+        statusReplicasPath: .status.replicas
       status: {}
 status:
   acceptedNames:
diff --git a/operator/scheduler/model.go b/operator/scheduler/model.go
index 7b0dde9a17..d4bcd24d43 100644
--- a/operator/scheduler/model.go
+++ b/operator/scheduler/model.go
@@ -135,6 +135,8 @@ func (s *SchedulerClient) SubscribeModelEvents(ctx context.Context) error {
 				logger.Info("Setting model to not ready", "name", event.ModelName, "state", latestVersionStatus.State.State.String())
 				latestModel.Status.CreateAndSetCondition(v1alpha1.ModelReady, false, latestVersionStatus.State.Reason)
 			}
+			// Set the total number of replicas targeted by this model
+			latestModel.Status.Replicas = int32(latestVersionStatus.State.GetAvailableReplicas() + latestVersionStatus.State.GetUnavailableReplicas())
 			return s.updateModelStatus(latestModel)
 		})
 		if retryErr != nil {
diff --git a/samples/k8s-examples.ipynb b/samples/k8s-examples.ipynb
index 1b420bedd5..c2ed36b749 100644
--- a/samples/k8s-examples.ipynb
+++ b/samples/k8s-examples.ipynb
@@ -17,7 +17,7 @@
     {
      "data": {
       "text/plain": [
-       "'172.22.255.9'"
+       "'172.31.255.9'"
       ]
      },
      "execution_count": 1,
@@ -89,7 +89,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "beneficial-logan",
    "metadata": {},
    "outputs": [
@@ -102,12 +102,12 @@
     }
    ],
    "source": [
-    "build!kubectl wait --for condition=ready --timeout=300s model --all -n seldon-mesh"
+    "!kubectl wait --for condition=ready --timeout=300s model --all -n seldon-mesh"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "prepared-duration",
    "metadata": {},
    "outputs": [
@@ -118,16 +118,17 @@
       "{\r\n",
       "  \"conditions\": [\r\n",
       "    {\r\n",
-      "      \"lastTransitionTime\": \"2022-05-26T10:09:32Z\",\r\n",
+      "      \"lastTransitionTime\": \"2022-06-03T14:35:59Z\",\r\n",
       "      \"status\": \"True\",\r\n",
       "      \"type\": \"ModelReady\"\r\n",
       "    },\r\n",
       "    {\r\n",
-      "      \"lastTransitionTime\": \"2022-05-26T10:09:32Z\",\r\n",
+      "      \"lastTransitionTime\": \"2022-06-03T14:35:59Z\",\r\n",
       "      \"status\": \"True\",\r\n",
       "      \"type\": \"Ready\"\r\n",
       "    }\r\n",
-      "  ]\r\n",
+      "  ],\r\n",
+      "  \"replicas\": 1\r\n",
       "}\r\n"
      ]
     }
@@ -138,7 +139,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "67900afd",
    "metadata": {},
    "outputs": [
@@ -149,7 +150,7 @@
       "{\r\n",
       "\t\"model_name\": \"iris_1\",\r\n",
       "\t\"model_version\": \"1\",\r\n",
-      "\t\"id\": \"5890ac9d-c1ed-4343-b9e6-a460bead5fe8\",\r\n",
+      "\t\"id\": \"3be6542c-5ad2-4ebc-a0d4-842377653b5d\",\r\n",
       "\t\"parameters\": null,\r\n",
       "\t\"outputs\": [\r\n",
       "\t\t{\r\n",
diff --git a/scheduler/Dockerfile.grafana b/scheduler/Dockerfile.grafana
new file mode 100644
index 0000000000..5f5aad2493
--- /dev/null
+++ b/scheduler/Dockerfile.grafana
@@ -0,0 +1,15 @@
+FROM grafana/grafana:8.5.4
+
+# Disable Login form or not
+ENV GF_AUTH_DISABLE_LOGIN_FORM "true"
+# Allow anonymous authentication or not
+ENV GF_AUTH_ANONYMOUS_ENABLED "true"
+# Role of anonymous user
+ENV GF_AUTH_ANONYMOUS_ORG_ROLE "Admin"
+
+# Add provisioning
+ADD ./config/grafana/provisioning /etc/grafana/provisioning
+# Add configuration file
+ADD ./config/grafana/grafana.ini /etc/grafana/grafana.ini
+# Add dashboard json files
+ADD ./config/grafana/dashboards /etc/grafana/dashboards
\ No newline at end of file
diff --git a/scheduler/Makefile b/scheduler/Makefile
index 7e1a88df09..43992e012b 100644
--- a/scheduler/Makefile
+++ b/scheduler/Makefile
@@ -7,6 +7,8 @@ MODELGATEWAY_IMG ?= ${DOCKERHUB_USERNAME}/seldon-modelgateway:${CUSTOM_IMAGE_TAG
 PIPELINEGATEWAY_IMG ?= ${DOCKERHUB_USERNAME}/seldon-pipelinegateway:${CUSTOM_IMAGE_TAG}
 DATAFLOW_IMG ?= ${DOCKERHUB_USERNAME}/seldon-dataflow-engine:${CUSTOM_IMAGE_TAG}
 ENVOY_IMG ?= ${DOCKERHUB_USERNAME}/seldon-envoy:${CUSTOM_IMAGE_TAG}
+# Grafana image only used for Docker compose not k8s
+GRAFANA_IMG ?= ${DOCKERHUB_USERNAME}/seldon-grafana:${CUSTOM_IMAGE_TAG}
 MLSERVER_IMG ?= seldonio/mlserver:1.1.0.dev3
 TRITON_IMG ?= nvcr.io/nvidia/tritonserver:21.12-py3
 KIND_NAME=ansible
@@ -143,11 +145,19 @@ docker-build-dataflow: copy-apis data-flow/opentelemetry-javaagent.jar
 docker-push-dataflow:
 	docker push ${DATAFLOW_IMG}
 
+.PHONY: docker-build-grafana
+docker-build-grafana:
+	docker build -t ${GRAFANA_IMG} -f Dockerfile.grafana .
+
+.PHONY: docker-push-grafana
+docker-push-grafana:
+	docker push ${GRAFANA_IMG}
+
 .PHONY: docker-build-all
-docker-build-all: docker-build-dataflow docker-build-agent docker-build-envoy docker-build-rclone docker-build-scheduler docker-build-modelgateway docker-build-pipelinegateway
+docker-build-all: docker-build-dataflow docker-build-agent docker-build-envoy docker-build-rclone docker-build-scheduler docker-build-modelgateway docker-build-pipelinegateway docker-build-grafana
 
 .PHONY: docker-push-all
-docker-push-all: docker-push-agent docker-push-envoy docker-push-rclone docker-push-scheduler docker-push-modelgateway docker-push-pipelinegateway docker-push-dataflow
+docker-push-all: docker-push-agent docker-push-envoy docker-push-rclone docker-push-scheduler docker-push-modelgateway docker-push-pipelinegateway docker-push-dataflow docker-push-grafana
 
 
 #####################################
@@ -203,7 +213,8 @@ DOCKER_COMPOSE_COMMON_IMAGES = \
 		RCLONE_IMAGE_AND_TAG=${RCLONE_IMG} \
 		SERVER_MLSERVER_IMAGE_AND_TAG=${MLSERVER_IMG} \
 		TRITON_LOG_LEVEL=${DOCKER_COMPOSE_TRITON_LOG_LEVEL} \
-		SERVER_TRITON_IMAGE_AND_TAG=${TRITON_IMG}
+		SERVER_TRITON_IMAGE_AND_TAG=${TRITON_IMG} \
+		GRAFANA_IMAGE_AND_TAG=${GRAFANA_IMG}
 
 DOCKER_COMPOSE_TRITON_LOG_LEVEL ?= 0
 
@@ -440,6 +451,30 @@ stop-kafka:
 start-kafka-host:
 	${DOCKER_COMPOSE_SERVICE_HOST_COMMAND} up -d kafka
 
+.PHONY: start-prometheus
+start-prometheus:
+	${DOCKER_COMPOSE_SERVICE_COMMAND} up -d prometheus
+
+.PHONY: stop-prometheus
+stop-prometheus:
+	${DOCKER_COMPOSE_SERVICE_COMMAND} rm --stop --force ${DOCKER_COMPOSE_REMOVE_VOLUMES} prometheus
+
+.PHONY: start-prometheus-host
+start-prometheus-host:
+	${DOCKER_COMPOSE_SERVICE_HOST_COMMAND} up -d prometheus
+
+.PHONY: start-grafana
+start-grafana:
+	${DOCKER_COMPOSE_SERVICE_COMMAND} up -d grafana
+
+.PHONY: stop-grafana
+stop-grafana:
+	${DOCKER_COMPOSE_SERVICE_COMMAND} rm --stop --force ${DOCKER_COMPOSE_REMOVE_VOLUMES} grafana
+
+.PHONY: start-grafana-host
+start-grafana-host:
+	${DOCKER_COMPOSE_SERVICE_HOST_COMMAND} up -d grafana
+
 .PHONY: stop-kafka-host
 stop-kafka-host:
 	${DOCKER_COMPOSE_SERVICE_HOST_COMMAND} rm --stop --force ${DOCKER_COMPOSE_REMOVE_VOLUMES} kafka
diff --git a/scheduler/all-base.yaml b/scheduler/all-base.yaml
index 7c223cf036..50613d1b8b 100644
--- a/scheduler/all-base.yaml
+++ b/scheduler/all-base.yaml
@@ -165,3 +165,12 @@ services:
     environment:
       - ALLOW_ANONYMOUS_LOGIN=yes
 
+  prometheus:
+    image: prom/prometheus:latest
+    ports:
+      - "9090:9090"
+
+  grafana:
+    image: "${GRAFANA_IMAGE_AND_TAG}"
+    ports:
+      - 3000:3000
diff --git a/scheduler/all-host-network.yaml b/scheduler/all-host-network.yaml
index d949876869..36ecb882e7 100644
--- a/scheduler/all-host-network.yaml
+++ b/scheduler/all-host-network.yaml
@@ -164,3 +164,12 @@ services:
   zookeeper:
     ports:
       - "2181:2181"
+
+  prometheus:
+    command:
+      - --config.file=/etc/prometheus/prometheus-host.yml
+    volumes:
+      - type: bind
+        source: ./config
+        target: /etc/prometheus
+      
diff --git a/scheduler/all-internal.yaml b/scheduler/all-internal.yaml
index 1166105bc8..e45df07682 100644
--- a/scheduler/all-internal.yaml
+++ b/scheduler/all-internal.yaml
@@ -185,7 +185,6 @@ services:
 
 
   dataflow:
-    environment:
     environment:
       - SELDON_UPSTREAM_PORT=${SCHEDULER_DATAFLOW_PORT}
       - SELDON_KAFKA_BOOTSTRAP_SERVERS=kafka:${KAFKA_BROKER_INTERNAL_PORT}
@@ -218,3 +217,11 @@ services:
   zookeeper:
     ports:
       - "2181:2181"
+
+  prometheus:
+    command:
+      - --config.file=/etc/prometheus/prometheus-internal.yml
+    volumes:
+      - type: bind
+        source: ./config
+        target: /etc/prometheus
diff --git a/scheduler/config/grafana/dashboards/seldon_overview/seldon.json b/scheduler/config/grafana/dashboards/seldon_overview/seldon.json
new file mode 100644
index 0000000000..4ce33689a6
--- /dev/null
+++ b/scheduler/config/grafana/dashboards/seldon_overview/seldon.json
@@ -0,0 +1,1012 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 26,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+	"datasource": "prometheus",
+      "description": "",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 5,
+        "w": 3,
+        "x": 0,
+        "y": 0
+      },
+      "id": 10,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "limit": 2,
+          "values": false
+        },
+        "text": {},
+        "textMode": "value_and_name"
+      },
+      "pluginVersion": "8.4.6",
+      "targets": [
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "count (seldon_loaded_model_memory_bytes_gauge >0 )",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "In-memory",
+          "refId": "B"
+        },
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "sum (seldon_loaded_model_gauge)",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "Registered",
+          "refId": "A"
+        }
+      ],
+      "title": "Models",
+      "transformations": [],
+      "type": "stat"
+    },
+    {
+	"datasource": "prometheus",
+      "description": "",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 5,
+        "w": 4,
+        "x": 3,
+        "y": 0
+      },
+      "id": 3,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": true
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "8.4.6",
+      "targets": [
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "sum by(server) (seldon_loaded_model_gauge)",
+          "format": "table",
+          "instant": false,
+          "interval": "",
+          "intervalFactor": 1,
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "Registered Model Replicas",
+      "transformations": [
+        {
+          "id": "groupBy",
+          "options": {
+            "fields": {
+              "Value": {
+                "aggregations": [
+                  "lastNotNull"
+                ],
+                "operation": "aggregate"
+              },
+              "server": {
+                "aggregations": [],
+                "operation": "groupby"
+              },
+              "server_replica": {
+                "aggregations": [],
+                "operation": "groupby"
+              }
+            }
+          }
+        }
+      ],
+      "type": "stat"
+    },
+    {
+	"datasource": "prometheus",
+      "description": "",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 5,
+        "w": 4,
+        "x": 7,
+        "y": 0
+      },
+      "id": 5,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": true
+        },
+        "text": {},
+        "textMode": "auto"
+      },
+      "pluginVersion": "8.4.6",
+      "targets": [
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "count by(server) (seldon_loaded_model_memory_bytes_gauge > 0)",
+          "format": "table",
+          "instant": false,
+          "interval": "",
+          "intervalFactor": 1,
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "In-Memory Model Replicas",
+      "transformations": [
+        {
+          "id": "groupBy",
+          "options": {
+            "fields": {
+              "Value": {
+                "aggregations": [
+                  "lastNotNull"
+                ],
+                "operation": "aggregate"
+              },
+              "server": {
+                "aggregations": [],
+                "operation": "groupby"
+              },
+              "server_replica": {
+                "aggregations": [],
+                "operation": "groupby"
+              }
+            }
+          }
+        }
+      ],
+      "type": "stat"
+    },
+    {
+	"datasource": "prometheus",
+      "description": "",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "max": 1,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "#EAB839",
+                "value": 0.8
+              },
+              {
+                "color": "dark-red",
+                "value": 0.9
+              }
+            ]
+          },
+          "unit": "percentunit"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 11,
+        "y": 0
+      },
+      "id": 9,
+      "options": {
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "limit": 4,
+          "values": true
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true,
+        "text": {}
+      },
+      "pluginVersion": "8.4.6",
+      "targets": [
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "sum by(server) (seldon_loaded_model_memory_bytes_gauge) / sum by(server) (seldon_server_replica_memory_capacity_overcommit_bytes_gauge)",
+          "format": "table",
+          "hide": false,
+          "instant": false,
+          "interval": "",
+          "intervalFactor": 1,
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "In-Memory Model Replicas (Memory Slots)",
+      "transformations": [
+        {
+          "id": "groupBy",
+          "options": {
+            "fields": {
+              "Time": {
+                "aggregations": [],
+                "operation": "groupby"
+              },
+              "Value": {
+                "aggregations": [
+                  "lastNotNull"
+                ],
+                "operation": "aggregate"
+              },
+              "Value #A": {
+                "aggregations": [
+                  "sum"
+                ],
+                "operation": "aggregate"
+              },
+              "Value #B": {
+                "aggregations": [
+                  "sum"
+                ],
+                "operation": "aggregate"
+              },
+              "model_internal": {
+                "aggregations": [],
+                "operation": "groupby"
+              },
+              "server": {
+                "aggregations": [],
+                "operation": "groupby"
+              },
+              "server_replica": {
+                "aggregations": [],
+                "operation": "groupby"
+              }
+            }
+          }
+        },
+        {
+          "id": "groupBy",
+          "options": {
+            "fields": {
+              "Value #A (sum)": {
+                "aggregations": [
+                  "last"
+                ],
+                "operation": "aggregate"
+              },
+              "Value #B (sum)": {
+                "aggregations": [
+                  "last"
+                ],
+                "operation": "aggregate"
+              },
+              "Value (lastNotNull)": {
+                "aggregations": [
+                  "lastNotNull"
+                ],
+                "operation": "aggregate"
+              },
+              "server": {
+                "aggregations": [],
+                "operation": "groupby"
+              },
+              "server_replica": {
+                "aggregations": [],
+                "operation": "groupby"
+              }
+            }
+          }
+        }
+      ],
+      "type": "gauge"
+    },
+    {
+	"datasource": "prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 8,
+        "x": 0,
+        "y": 5
+      },
+      "id": 12,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "hidden",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "sum(rate(seldon_cache_evict_count[1m]))",
+          "format": "time_series",
+          "instant": false,
+          "interval": "",
+          "intervalFactor": 1,
+          "legendFormat": "Evict Rate",
+          "refId": "A"
+        },
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "sum(rate(seldon_cache_miss_count[1m]))",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "Miss Rate",
+          "refId": "B"
+        }
+      ],
+      "title": "Model Evict/Miss Rate [1m]",
+      "type": "timeseries"
+    },
+    {
+	"datasource": "prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 8,
+        "x": 8,
+        "y": 5
+      },
+      "id": 20,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "hidden",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "sum by (server) (rate(seldon_load_model_counter[1m]))",
+          "format": "time_series",
+          "instant": false,
+          "interval": "",
+          "intervalFactor": 1,
+          "legendFormat": "{{server}}_Load",
+          "refId": "A"
+        },
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "sum by (server) (rate(seldon_unload_model_counter[1m]))",
+          "format": "time_series",
+          "hide": false,
+          "instant": false,
+          "interval": "",
+          "intervalFactor": 1,
+          "legendFormat": "Unloa{{server}}_Loadd",
+          "refId": "B"
+        }
+      ],
+      "title": "Model Load/Unload Rate [1m]",
+      "type": "timeseries"
+    },
+    {
+	"datasource": "prometheus",
+      "description": "",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "decbytes"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 0,
+        "y": 12
+      },
+      "id": 7,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "8.4.6",
+      "targets": [
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "sum(seldon_server_replica_memory_capacity_bytes_gauge)",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "Capacity",
+          "refId": "B"
+        },
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "sum(seldon_loaded_model_memory_bytes_gauge)",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "Used",
+          "refId": "C"
+        },
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "sum(seldon_server_replica_memory_capacity_overcommit_bytes_gauge)",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "Capacity with Over-commit",
+          "refId": "A"
+        },
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "sum(seldon_loaded_model_memory_bytes_gauge) + sum(seldon_evicted_model_memory_bytes_gauge)",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "Used with Over-commit",
+          "refId": "D"
+        }
+      ],
+      "title": "Memory Slots",
+      "transformations": [],
+      "type": "timeseries"
+    },
+    {
+	"datasource": "prometheus",
+      "description": "",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineStyle": {
+              "fill": "solid"
+            },
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 8,
+        "y": 12
+      },
+      "id": 15,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "8.4.6",
+      "targets": [
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "avg((rate(seldon_aggregate_infer_seconds_total[1m]) / rate(seldon_aggregate_infer_total[1m])) > 0 ) by (server, method_type)",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "{{server}}_{{method_type}}_avg",
+          "refId": "A"
+        }
+      ],
+      "title": "Infer Latency [1m]",
+      "transformations": [],
+      "type": "timeseries"
+    },
+    {
+	"datasource": "prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "decbytes"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 8,
+        "x": 0,
+        "y": 20
+      },
+      "id": 19,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "container_memory_working_set_bytes{container=\"mlserver\"}",
+          "interval": "10s",
+          "legendFormat": "{{pod}}",
+          "refId": "A"
+        },
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "container_memory_working_set_bytes{container=\"triton\"}",
+          "hide": false,
+          "interval": "10s",
+          "legendFormat": "{{pod}}",
+          "refId": "B"
+        }
+      ],
+      "title": "Memory Used",
+      "type": "timeseries"
+    },
+    {
+	"datasource": "prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "cores",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 8,
+        "x": 8,
+        "y": 20
+      },
+      "id": 17,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "rate (container_cpu_usage_seconds_total{container=\"mlserver\"}[1m])",
+          "interval": "10s",
+          "legendFormat": "{{pod}}",
+          "refId": "A"
+        },
+        {
+            "datasource": "prometheus",
+          "exemplar": true,
+          "expr": "rate (container_cpu_usage_seconds_total{container=\"triton\"}[1m])",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "{{pod}}",
+          "refId": "B"
+        }
+      ],
+      "title": "CPU [1m]",
+      "type": "timeseries"
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 35,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-15m",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "Seldon Core Model Mesh Monitoring",
+  "uid": "MHloCP_7z",
+  "version": 22,
+  "weekStart": ""
+}
diff --git a/scheduler/config/grafana/grafana.ini b/scheduler/config/grafana/grafana.ini
new file mode 100644
index 0000000000..13f87ed2a4
--- /dev/null
+++ b/scheduler/config/grafana/grafana.ini
@@ -0,0 +1,12 @@
+[paths]
+provisioning = /etc/grafana/provisioning
+
+[server]
+enable_gzip = true
+
+[security]
+# If you want to embed grafana into an iframe for example
+allow_embedding = true
+
+[users]
+default_theme = dark
\ No newline at end of file
diff --git a/scheduler/config/grafana/provisioning/dashboards/dashboard.yml b/scheduler/config/grafana/provisioning/dashboards/dashboard.yml
new file mode 100644
index 0000000000..04dcb5c82e
--- /dev/null
+++ b/scheduler/config/grafana/provisioning/dashboards/dashboard.yml
@@ -0,0 +1,16 @@
+apiVersion: 1
+
+providers:
+  - name: Seldon
+    org_id: 1
+    # <string, required> name of the dashboard folder. Required
+    folder: ''
+    # <string, required> provider type. Required
+    type: 'file'
+    disableDeletion: false
+    editable: true
+    updateIntervalSeconds: 5
+    allowUiUpdates: true
+    options:
+      path: /etc/grafana/dashboards
+      foldersFromFilesStructure: true
diff --git a/scheduler/config/grafana/provisioning/datasources/prometheus.yml b/scheduler/config/grafana/provisioning/datasources/prometheus.yml
new file mode 100644
index 0000000000..b4603657bc
--- /dev/null
+++ b/scheduler/config/grafana/provisioning/datasources/prometheus.yml
@@ -0,0 +1,14 @@
+apiVersion: 1
+
+deleteDatasources:
+  - name: prometheus
+    orgId: 1
+
+datasources:
+  - name: prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    orgId: 1
+
+
diff --git a/scheduler/config/prometheus-host.yml b/scheduler/config/prometheus-host.yml
new file mode 100644
index 0000000000..f1b60db508
--- /dev/null
+++ b/scheduler/config/prometheus-host.yml
@@ -0,0 +1,6 @@
+scrape_configs:
+  - job_name: 'agent'
+    scrape_interval: 10s
+    static_configs:
+      - targets: ['0.0.0.0:9006']
+      - targets: ['0.0.0.0:9007']        
diff --git a/scheduler/config/prometheus-internal.yml b/scheduler/config/prometheus-internal.yml
new file mode 100644
index 0000000000..7f8f74da19
--- /dev/null
+++ b/scheduler/config/prometheus-internal.yml
@@ -0,0 +1,6 @@
+scrape_configs:
+  - job_name: 'agent'
+    scrape_interval: 10s
+    static_configs:
+      - targets: ['agent-mlserver:9006']
+      - targets: ['agent-triton:9006']        
diff --git a/scheduler/env.all b/scheduler/env.all
index 5e66068070..5e5bfff9dc 100644
--- a/scheduler/env.all
+++ b/scheduler/env.all
@@ -38,6 +38,7 @@ SERVER_MLSERVER_IMAGE_AND_TAG=seldonio/mlserver:1.0.1
 SERVER_TRITON_IMAGE_AND_TAG=nvcr.io/nvidia/tritonserver:21.12-py3
 SCHEDULER_IMAGE_AND_TAG=seldonio/seldon-scheduler:latest
 KAFKA_IMAGE_AND_TAG=quay.io/strimzi/kafka:0.28.0-kafka-3.1.0
+GRAFANA_IMAGE_AND_TAG=seldonio/seldon-grafana:latest
 
 AGENT_OVERCOMMIT_PERCENTAGE=20
 AGENT_MEMORY_REQUEST=10000000
diff --git a/tests/k6/README.md b/tests/k6/README.md
index d87f2b4e54..fd32ac8620 100644
--- a/tests/k6/README.md
+++ b/tests/k6/README.md
@@ -37,4 +37,18 @@ For k8s you will need to update the default endpoints to the services exposed, e
 
 ```
 MODEL_TYPE="tfsimple" SCHEDULER_ENDPOINT=172.18.255.4:9004 INFER_GRPC_ENDPOINT=172.18.255.3:80 INFER_HTTP_ENDPOINT=http://172.18.255.3 k6 run -u 5 -i 50 scenarios/load_predict_unload.js
-```
\ No newline at end of file
+```
+
+## Constant Throughput Test
+
+Run against model name `iris` which is of type `iris` against a envoy http ip as given.
+
+```
+ MODEL_NAME="iris" MODEL_TYPE="iris" INFER_HTTP_ENDPOINT="http://172.31.255.9" k6 run scenarios/model_constant_rate.js
+```
+
+Run localy but with grpc
+
+```
+INFER_TYPE="grpc" MODEL_TYPE="iris" k6 run scenarios/model_constant_rate.js
+```
diff --git a/tests/k6/components/settings.js b/tests/k6/components/settings.js
index 56564e7301..d8eb4f49f7 100644
--- a/tests/k6/components/settings.js
+++ b/tests/k6/components/settings.js
@@ -110,6 +110,20 @@ function inferBatchSize() {
     return 1
 }
 
+function modelStartIdx() {
+    if (__ENV.MODEL_START_IDX) {
+        return parseInt(__ENV.MODEL_START_IDX)
+    }
+    return 0
+}
+
+function modelEndIdx() {
+    if (__ENV.MODEL_END_IDX) {
+        return parseInt(__ENV.MODEL_END_IDX)
+    }
+    return 0
+}
+
 function isLoadPipeline() {
     if (__ENV.DATAFLOW_TAG) {
         return !(__ENV.DATAFLOW_TAG === "")
@@ -131,6 +145,13 @@ function modelNamePrefix() {
     return "model"
 }
 
+function modelName() {
+    if (__ENV.MODEL_NAME) {
+        return __ENV.MODEL_NAME
+    }
+    return ""
+}
+
 function experimentNamePrefix() {
     if (__ENV.EXPERIMENTNAME_PREFIX) {
         return __ENV.EXPERIMENTNAME_PREFIX
@@ -138,6 +159,13 @@ function experimentNamePrefix() {
     return "experiment"
 }
 
+function inferType() {
+    if (__ENV.INFER_TYPE) {
+        return __ENV.INFER_REST
+    }
+    return "REST"
+}
+
 export function getConfig() {
     return {
         "schedulerEndpoint": schedulerEndpoint(),
@@ -160,5 +188,9 @@ export function getConfig() {
         "experimentNamePrefix": experimentNamePrefix(),
         "loadExperiment" : loadExperiment(),
         "unloadExperiment": unloadExperiment(),
+        "modelStartIdx" : modelStartIdx(),
+        "modelEndIdx" : modelEndIdx(),
+        "modelName" : modelName(),
+        "inferType" : inferType(),
     }
 }
\ No newline at end of file
diff --git a/tests/k6/scenarios/model_constant_rate.js b/tests/k6/scenarios/model_constant_rate.js
new file mode 100644
index 0000000000..8db8221589
--- /dev/null
+++ b/tests/k6/scenarios/model_constant_rate.js
@@ -0,0 +1,48 @@
+import {inferHttpLoop, inferGrpcLoop, inferHttp, inferGrpc, connectV2Grpc, disconnectV2Grpc} from '../components/v2.js'
+import {getConfig} from '../components/settings.js'
+import {generateModel } from '../components/model.js'
+import { vu, scenario } from 'k6/execution';
+import { randomIntBetween } from 'https://jslib.k6.io/k6-utils/1.2.0/index.js';
+
+export const options = {
+    scenarios: {
+        constant_request_rate: {
+            executor: 'constant-arrival-rate',
+            rate: 10,
+            timeUnit: '1s',
+            duration: '5s',
+            preAllocatedVUs: 1, // how large the initial pool of VUs would be
+            maxVUs: 100, // if the preAllocatedVUs are not enough, we can initialize more
+        },
+    },
+};
+
+export function setup() {
+    return getConfig()
+}
+
+export default function (config) {
+    const modelIdx = randomIntBetween(config.modelStartIdx, config.modelEndIdx)
+    const modelName = config.modelNamePrefix + modelIdx.toString()
+    const model = generateModel(config.modelType, modelName, 0, 1,
+        config.isSchedulerProxy, config.modelMemoryBytes, config.inferBatchSize)
+    const modelDefn = model.modelDefn
+    const httpEndpoint = config.inferHttpEndpoint
+    const grpcEndpoint = config.inferGrpcEndpoint
+
+    if (config.inferType === "REST") {
+        if (config.modelName !== "") {
+            inferHttp(httpEndpoint, config.modelName, model.inference.http, true, "")
+        } else {
+            inferHttp(httpEndpoint, modelName, model.inference.http, true, "")
+        }
+    } else {
+        connectV2Grpc(grpcEndpoint)
+        if (config.modelName !== "") {
+            inferGrpc(config.modelName, model.inference.grpc, true, "")
+        } else {
+            inferGrpc(modelName, model.inference.grpc, true, "")
+        }
+        disconnectV2Grpc()
+    }
+}
\ No newline at end of file