From 5ad2294c14e5a986b3c7f0d41ab487bbdb0d410f Mon Sep 17 00:00:00 2001 From: Markus Lehtonen Date: Wed, 2 Aug 2023 16:47:11 +0300 Subject: [PATCH] metrics: add nfd_node_update_requests_total counter Add a counter for total number of node update/sync requests. In practice, this counts the number of gRPC requests received if the gRPC API is in use. If the NodeFeature API is enabled, this counts the requests initiated by the NFD API controller, i.e. updates triggered by changes in NodeFeature or NodeFeatureRule objects plus updates initiated by the controller resync period. --- docs/deployment/metrics.md | 1 + pkg/nfd-master/metrics.go | 6 ++++++ pkg/nfd-master/nfd-master.go | 1 + pkg/nfd-master/node-updater-pool.go | 1 + 4 files changed, 9 insertions(+) diff --git a/docs/deployment/metrics.md b/docs/deployment/metrics.md index 550671f6e7..7e52687309 100644 --- a/docs/deployment/metrics.md +++ b/docs/deployment/metrics.md @@ -17,6 +17,7 @@ The exposed metrics are | ------------------------------------------------- | --------- | --------------------------------------- | `nfd_master_build_info` | Gauge | Version from which nfd-master was built | `nfd_worker_build_info` | Gauge | Version from which nfd-worker was built +| `nfd_node_update_requests_total` | Counter | Number of node update requests processed by the master | `nfd_node_updates_total` | Counter | Number of nodes updated | `nfd_node_update_failures_total` | Counter | Number of nodes update failures | `nfd_node_labels_rejected_total` | Counter | Number of nodes labels rejected by nfd-master diff --git a/pkg/nfd-master/metrics.go b/pkg/nfd-master/metrics.go index 1aec52f287..c64842a7ae 100644 --- a/pkg/nfd-master/metrics.go +++ b/pkg/nfd-master/metrics.go @@ -29,6 +29,7 @@ import ( // When adding metric names, see https://prometheus.io/docs/practices/naming/#metric-names const ( buildInfoQuery = "nfd_master_build_info" + nodeUpdateRequestsQuery = "nfd_node_update_requests_total" nodeUpdatesQuery = "nfd_node_updates_total" nodeUpdateFailuresQuery = "nfd_node_update_failures_total" nodeLabelsRejectedQuery = "nfd_node_labels_rejected_total" @@ -48,6 +49,10 @@ var ( "version": version.Get(), }, }) + nodeUpdateRequests = prometheus.NewCounter(prometheus.CounterOpts{ + Name: nodeUpdateRequestsQuery, + Help: "Number of node update requests processed by the master.", + }) nodeUpdates = prometheus.NewCounter(prometheus.CounterOpts{ Name: nodeUpdatesQuery, Help: "Number of nodes updated by the master.", @@ -95,6 +100,7 @@ func runMetricsServer(port int) { r := prometheus.NewRegistry() r.MustRegister( buildInfo, + nodeUpdateRequests, nodeUpdates, nodeUpdateFailures, nodeLabelsRejected, diff --git a/pkg/nfd-master/nfd-master.go b/pkg/nfd-master/nfd-master.go index cf92976ffc..9f283398cc 100644 --- a/pkg/nfd-master/nfd-master.go +++ b/pkg/nfd-master/nfd-master.go @@ -654,6 +654,7 @@ func isNamespaceDenied(labelNs string, wildcardDeniedNs map[string]struct{}, nor // SetLabels implements LabelerServer func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.SetLabelsReply, error) { + nodeUpdateRequests.Inc() err := authorizeClient(c, m.args.VerifyNodeName, r.NodeName) if err != nil { klog.ErrorS(err, "gRPC client authorization failed", "nodeName", r.NodeName) diff --git a/pkg/nfd-master/node-updater-pool.go b/pkg/nfd-master/node-updater-pool.go index 59429c34b2..55ce2a712f 100644 --- a/pkg/nfd-master/node-updater-pool.go +++ b/pkg/nfd-master/node-updater-pool.go @@ -46,6 +46,7 @@ func (u *nodeUpdaterPool) processNodeUpdateRequest(queue workqueue.RateLimitingI defer queue.Done(nodeName) + nodeUpdateRequests.Inc() if err := u.nfdMaster.nfdAPIUpdateOneNode(nodeName.(string)); err != nil { if queue.NumRequeues(nodeName) < 5 { klog.InfoS("retrying node update", "nodeName", nodeName)