Skip to content

Commit

Permalink
Add metrics for admission attempts count and duration
Browse files Browse the repository at this point in the history
Add role and rolebinding for prometheus to be able to list the services in the kueue-system namespace.

Change-Id: I77cf51536ebf53ece9a4ba2d8457dbc3e71d1e8d
  • Loading branch information
alculquicondor committed Apr 27, 2022
1 parent 9968361 commit 3173820
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 2 deletions.
1 change: 1 addition & 0 deletions config/prometheus/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
resources:
- monitor.yaml
- role.yaml
46 changes: 46 additions & 0 deletions config/prometheus/role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: system
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- ingresses
verbs:
- get
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
4 changes: 2 additions & 2 deletions config/rbac/auth_proxy_service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ metadata:
spec:
ports:
- name: https
port: 8443
port: 443
protocol: TCP
targetPort: https
targetPort: 8443
selector:
control-plane: controller-manager
58 changes: 58 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"time"

"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

type AdmissionResult string

const (
SuccessAdmissionResult AdmissionResult = "success"
InadmissibleAdmissionResult AdmissionResult = "inadmissible"
)

var (
admissionAttempts = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "admission_attempts_total",
Help: "Number of attempts to admit pods, by result. `success` means that at least one workload was admitted, `inadmissible` means that no workload was admitted.",
}, []string{"result"},
)

admissionAttemptLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "admission_attempt_duration_seconds",
Help: "Latency of an admission attempt",
}, []string{"result"},
)
)

func AdmissionAttempt(result AdmissionResult, duration time.Duration) {
admissionAttempts.WithLabelValues(string(result)).Inc()
admissionAttemptLatency.WithLabelValues(string(result)).Observe(duration.Seconds())
}

func init() {
metrics.Registry.MustRegister(
admissionAttempts,
)
}
7 changes: 7 additions & 0 deletions pkg/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"sort"
"time"

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
Expand All @@ -35,6 +36,7 @@ import (
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/kueue/pkg/metrics"

kueue "sigs.k8s.io/kueue/apis/kueue/v1alpha1"
"sigs.k8s.io/kueue/pkg/cache"
Expand Down Expand Up @@ -85,6 +87,7 @@ func (s *Scheduler) schedule(ctx context.Context) {
if len(headWorkloads) == 0 {
return
}
startTime := time.Now()

// 2. Take a snapshot of the cache.
snapshot := s.cache.Snapshot()
Expand Down Expand Up @@ -127,6 +130,7 @@ func (s *Scheduler) schedule(ctx context.Context) {
}

// 6. Requeue the heads that were not scheduled.
result := metrics.InadmissibleAdmissionResult
for _, e := range entries {
log.V(3).Info("Workload evaluated for admission",
"workload", klog.KObj(e.Obj),
Expand All @@ -135,8 +139,11 @@ func (s *Scheduler) schedule(ctx context.Context) {
"reason", e.inadmissibleReason)
if e.status != assumed {
s.requeueAndUpdate(log, ctx, e)
} else {
result = metrics.SuccessAdmissionResult
}
}
metrics.AdmissionAttempt(result, time.Now().Sub(startTime))
}

type entryStatus string
Expand Down

0 comments on commit 3173820

Please sign in to comment.