From c7a71e25a2a460c028ffbe34d29dd5ca09f30945 Mon Sep 17 00:00:00 2001 From: Filippe Spolti Date: Mon, 26 Aug 2024 20:00:46 -0400 Subject: [PATCH] [RHOAIENG-11527] - [backend] Incorrect values for number of requests in Kserve Performance metrics (#256) Signed-off-by: Spolti --- controllers/constants/constants.go | 1 + controllers/constants/runtime-metrics.go | 22 +++++++++---------- ...nferenceservice_controller_metrics_test.go | 18 ++++++--------- .../kserve_metrics_dashboard_reconciler.go | 19 +++++----------- controllers/utils/utils.go | 10 +++++++++ 5 files changed, 35 insertions(+), 35 deletions(-) diff --git a/controllers/constants/constants.go b/controllers/constants/constants.go index 11b63ace..a5d83dcc 100644 --- a/controllers/constants/constants.go +++ b/controllers/constants/constants.go @@ -56,6 +56,7 @@ const ( KserveMetricsConfigMapNameSuffix = "-metrics-dashboard" DefaultStorageConfig = "storage-config" IntervalValue = "1m" + RequestRateInterval = "5m" OvmsImageName = "openvino_model_server" TgisImageName = "text-generation-inference" VllmImageName = "vllm" diff --git a/controllers/constants/runtime-metrics.go b/controllers/constants/runtime-metrics.go index decbfd0c..7ca95b37 100644 --- a/controllers/constants/runtime-metrics.go +++ b/controllers/constants/runtime-metrics.go @@ -20,16 +20,16 @@ const ( CaikitMetricsData = `{ "config": [ { - "title": "Number of requests", + "title": "Requests per 5 minutes", "type": "REQUEST_COUNT", "queries": [ { "title": "Number of successful incoming requests", - "query": "round(sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}])))" + "query": "round(sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code='OK',model_id='${MODEL_NAME}'}[${REQUEST_RATE_INTERVAL}])))" }, { "title": "Number of failed incoming requests", - "query": "round(sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code!='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}])))" + "query": "round(sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code!='OK',model_id='${MODEL_NAME}'}[${REQUEST_RATE_INTERVAL}])))" } ] }, @@ -74,16 +74,16 @@ const ( OvmsMetricsData = `{ "config": [ { - "title": "Number of requests", + "title": "Requests per 5 minutes", "type": "REQUEST_COUNT", "queries": [ { "title": "Number of successful incoming requests", - "query": "round(sum(increase(ovms_requests_success{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}])))" + "query": "round(sum(increase(ovms_requests_success{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${REQUEST_RATE_INTERVAL}])))" }, { "title": "Number of failed incoming requests", - "query": "round(sum(increase(ovms_requests_fail{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}])))" + "query": "round(sum(increase(ovms_requests_fail{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${REQUEST_RATE_INTERVAL}])))" } ] }, @@ -128,16 +128,16 @@ const ( TgisMetricsData = `{ "config": [ { - "title": "Number of requests", + "title": "Requests per 5 minutes", "type": "REQUEST_COUNT", "queries": [ { "title": "Number of successful incoming requests", - "query": "round(sum(increase(tgi_request_success{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])))" + "query": "round(sum(increase(tgi_request_success{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${REQUEST_RATE_INTERVAL}])))" }, { "title": "Number of failed incoming requests", - "query": "round(sum(increase(tgi_request_failure{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])))" + "query": "round(sum(increase(tgi_request_failure{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${REQUEST_RATE_INTERVAL}])))" } ] }, @@ -182,12 +182,12 @@ const ( VllmMetricsData = `{ "config": [ { - "title": "Number of requests", + "title": "Requests per 5 minutes", "type": "REQUEST_COUNT", "queries": [ { "title": "Number of successful incoming requests", - "query": "round(sum(increase(vllm:request_success_total{namespace='${NAMESPACE}',model_name='${model_name}'}[${RATE_INTERVAL}])))" + "query": "round(sum(increase(vllm:request_success_total{namespace='${NAMESPACE}',model_name='${model_name}'}[${REQUEST_RATE_INTERVAL}])))" } ] }, diff --git a/controllers/kserve_inferenceservice_controller_metrics_test.go b/controllers/kserve_inferenceservice_controller_metrics_test.go index e52e8a0c..2f7c3c8e 100644 --- a/controllers/kserve_inferenceservice_controller_metrics_test.go +++ b/controllers/kserve_inferenceservice_controller_metrics_test.go @@ -2,9 +2,9 @@ package controllers import ( "github.com/opendatahub-io/odh-model-controller/controllers/constants" + "github.com/opendatahub-io/odh-model-controller/controllers/utils" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "strings" "time" . "github.com/onsi/ginkgo" @@ -73,8 +73,8 @@ var _ = Describe("The KServe Dashboard reconciler", func() { Expect(err).NotTo(HaveOccurred()) Expect(metricsConfigMap).NotTo(BeNil()) - finaldata := substituteVariablesInQueries(constants.OvmsMetricsData, testNs, KserveOvmsInferenceServiceName, constants.IntervalValue) - expectedmetricsConfigMap := &corev1.ConfigMap{ + finaldata := utils.SubstituteVariablesInQueries(constants.OvmsMetricsData, testNs, KserveOvmsInferenceServiceName) + expectedMetricsConfigMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: KserveOvmsInferenceServiceName + constants.KserveMetricsConfigMapNameSuffix, Namespace: testNs, @@ -84,7 +84,8 @@ var _ = Describe("The KServe Dashboard reconciler", func() { "metrics": finaldata, }, } - Expect(compareConfigMap(metricsConfigMap, expectedmetricsConfigMap)).Should(BeTrue()) + Expect(compareConfigMap(metricsConfigMap, expectedMetricsConfigMap)).Should(BeTrue()) + Expect(expectedMetricsConfigMap.Data).NotTo(HaveKeyWithValue("metrics", ContainSubstring("${REQUEST_RATE_INTERVAL}"))) }) It("if the runtime is not supported for metrics, it should create a configmap with the unsupported config", func() { @@ -95,7 +96,7 @@ var _ = Describe("The KServe Dashboard reconciler", func() { Expect(err).NotTo(HaveOccurred()) Expect(metricsConfigMap).NotTo(BeNil()) - expectedmetricsConfigMap := &corev1.ConfigMap{ + expectedMetricsConfigMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: UnsupportedMetricsInferenceServiceName + constants.KserveMetricsConfigMapNameSuffix, Namespace: testNs, @@ -104,7 +105,7 @@ var _ = Describe("The KServe Dashboard reconciler", func() { "supported": "false", }, } - Expect(compareConfigMap(metricsConfigMap, expectedmetricsConfigMap)).Should(BeTrue()) + Expect(compareConfigMap(metricsConfigMap, expectedMetricsConfigMap)).Should(BeTrue()) }) It("if the isvc does not have a runtime specified, an unsupported metrics configmap should be created", func() { @@ -153,8 +154,3 @@ var _ = Describe("The KServe Dashboard reconciler", func() { }) }) }) - -func substituteVariablesInQueries(data string, namespace string, name string, IntervalValue string) string { - replacer := strings.NewReplacer("${NAMESPACE}", namespace, "${MODEL_NAME}", name, "${RATE_INTERVAL}", IntervalValue) - return replacer.Replace(data) -} diff --git a/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go b/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go index 0b3eb8e8..0eec5261 100644 --- a/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go +++ b/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go @@ -17,20 +17,18 @@ package reconcilers import ( "context" - "github.com/hashicorp/errwrap" - "github.com/opendatahub-io/odh-model-controller/controllers/utils" - "regexp" - ctrl "sigs.k8s.io/controller-runtime" - "strconv" - "strings" - "github.com/go-logr/logr" + "github.com/hashicorp/errwrap" kservev1alpha1 "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" "github.com/opendatahub-io/odh-model-controller/controllers/comparators" "github.com/opendatahub-io/odh-model-controller/controllers/constants" "github.com/opendatahub-io/odh-model-controller/controllers/processors" "github.com/opendatahub-io/odh-model-controller/controllers/resources" + "github.com/opendatahub-io/odh-model-controller/controllers/utils" + "regexp" + ctrl "sigs.k8s.io/controller-runtime" + "strconv" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -146,7 +144,7 @@ func (r *KserveMetricsDashboardReconciler) createDesiredResource(ctx context.Con return nil, err } if supported { - finaldata := substituteVariablesInQueries(data, isvc.Namespace, isvc.Name, constants.IntervalValue) + finaldata := utils.SubstituteVariablesInQueries(data, isvc.Namespace, isvc.Name) configMap.Data["metrics"] = finaldata } @@ -154,11 +152,6 @@ func (r *KserveMetricsDashboardReconciler) createDesiredResource(ctx context.Con } -func substituteVariablesInQueries(data string, namespace string, name string, IntervalValue string) string { - replacer := strings.NewReplacer("${NAMESPACE}", namespace, "${MODEL_NAME}", name, "${RATE_INTERVAL}", IntervalValue) - return replacer.Replace(data) -} - func (r *KserveMetricsDashboardReconciler) createConfigMap(isvc *kservev1beta1.InferenceService, supported bool, log logr.Logger) (*corev1.ConfigMap, error) { configMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ diff --git a/controllers/utils/utils.go b/controllers/utils/utils.go index 60385633..73780906 100644 --- a/controllers/utils/utils.go +++ b/controllers/utils/utils.go @@ -11,6 +11,7 @@ import ( "os" "reflect" "sort" + "strings" kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" "github.com/kuadrant/authorino/pkg/log" @@ -399,3 +400,12 @@ func FindSupportingRuntimeForISvc(ctx context.Context, cli client.Client, log lo return desiredServingRuntime, errors.New(constants.NoSuitableRuntimeError) } } + +func SubstituteVariablesInQueries(data string, namespace string, name string) string { + replacer := strings.NewReplacer( + "${NAMESPACE}", namespace, + "${MODEL_NAME}", name, + "${RATE_INTERVAL}", constants.IntervalValue, + "${REQUEST_RATE_INTERVAL}", constants.RequestRateInterval) + return replacer.Replace(data) +}