Skip to content

Commit

Permalink
metrics: add nfd_node_update_requests_total counter
Browse files Browse the repository at this point in the history
Add a counter for total number of node update/sync requests. In
practice, this counts the number of gRPC requests received if the gRPC
API is in use. If the NodeFeature API is enabled, this counts the
requests initiated by the NFD API controller, i.e. updates triggered by
changes in NodeFeature or NodeFeatureRule objects plus updates initiated
by the controller resync period.
  • Loading branch information
marquiz committed Aug 7, 2023
1 parent 4b24cc1 commit 5ad2294
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/deployment/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The exposed metrics are
| ------------------------------------------------- | --------- | ---------------------------------------
| `nfd_master_build_info` | Gauge | Version from which nfd-master was built
| `nfd_worker_build_info` | Gauge | Version from which nfd-worker was built
| `nfd_node_update_requests_total` | Counter | Number of node update requests processed by the master
| `nfd_node_updates_total` | Counter | Number of nodes updated
| `nfd_node_update_failures_total` | Counter | Number of nodes update failures
| `nfd_node_labels_rejected_total` | Counter | Number of nodes labels rejected by nfd-master
Expand Down
6 changes: 6 additions & 0 deletions pkg/nfd-master/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
// When adding metric names, see https://prometheus.io/docs/practices/naming/#metric-names
const (
buildInfoQuery = "nfd_master_build_info"
nodeUpdateRequestsQuery = "nfd_node_update_requests_total"
nodeUpdatesQuery = "nfd_node_updates_total"
nodeUpdateFailuresQuery = "nfd_node_update_failures_total"
nodeLabelsRejectedQuery = "nfd_node_labels_rejected_total"
Expand All @@ -48,6 +49,10 @@ var (
"version": version.Get(),
},
})
nodeUpdateRequests = prometheus.NewCounter(prometheus.CounterOpts{
Name: nodeUpdateRequestsQuery,
Help: "Number of node update requests processed by the master.",
})
nodeUpdates = prometheus.NewCounter(prometheus.CounterOpts{
Name: nodeUpdatesQuery,
Help: "Number of nodes updated by the master.",
Expand Down Expand Up @@ -95,6 +100,7 @@ func runMetricsServer(port int) {
r := prometheus.NewRegistry()
r.MustRegister(
buildInfo,
nodeUpdateRequests,
nodeUpdates,
nodeUpdateFailures,
nodeLabelsRejected,
Expand Down
1 change: 1 addition & 0 deletions pkg/nfd-master/nfd-master.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@ func isNamespaceDenied(labelNs string, wildcardDeniedNs map[string]struct{}, nor

// SetLabels implements LabelerServer
func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.SetLabelsReply, error) {
nodeUpdateRequests.Inc()
err := authorizeClient(c, m.args.VerifyNodeName, r.NodeName)
if err != nil {
klog.ErrorS(err, "gRPC client authorization failed", "nodeName", r.NodeName)
Expand Down
1 change: 1 addition & 0 deletions pkg/nfd-master/node-updater-pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func (u *nodeUpdaterPool) processNodeUpdateRequest(queue workqueue.RateLimitingI

defer queue.Done(nodeName)

nodeUpdateRequests.Inc()
if err := u.nfdMaster.nfdAPIUpdateOneNode(nodeName.(string)); err != nil {
if queue.NumRequeues(nodeName) < 5 {
klog.InfoS("retrying node update", "nodeName", nodeName)
Expand Down

0 comments on commit 5ad2294

Please sign in to comment.