Skip to content

Commit

Permalink
metrics: counters for rejected labels, extended resources and taints
Browse files Browse the repository at this point in the history
Add counters for labels, extended resources and taints rejected/filtered
out by nfd-master.
  • Loading branch information
marquiz committed Aug 7, 2023
1 parent a8a29e6 commit 4b24cc1
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 1 deletion.
3 changes: 3 additions & 0 deletions docs/deployment/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ The exposed metrics are
| `nfd_worker_build_info` | Gauge | Version from which nfd-worker was built
| `nfd_node_updates_total` | Counter | Number of nodes updated
| `nfd_node_update_failures_total` | Counter | Number of nodes update failures
| `nfd_node_labels_rejected_total` | Counter | Number of nodes labels rejected by nfd-master
| `nfd_node_extendedresources_rejected_total` | Counter | Number of nodes extended resources rejected by nfd-master
| `nfd_node_taints_rejected_total` | Counter | Number of nodes taints rejected by nfd-master
| `nfd_nodefeaturerule_processing_duration_seconds` | Histogram | Time taken to process NodeFeatureRule objects
| `nfd_nodefeaturerule_processing_errors_total` | Counter | Number or errors encountered while processing NodeFeatureRule objects
| `nfd_feature_discovery_duration_seconds` | Histogram | Time taken to discover features on a node
Expand Down
21 changes: 20 additions & 1 deletion pkg/nfd-master/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ const (
buildInfoQuery = "nfd_master_build_info"
nodeUpdatesQuery = "nfd_node_updates_total"
nodeUpdateFailuresQuery = "nfd_node_update_failures_total"
nodeLabelsRejectedQuery = "nfd_node_labels_rejected_total"
nodeERsRejectedQuery = "nfd_node_extendedresources_rejected_total"
nodeTaintsRejectedQuery = "nfd_node_taints_rejected_total"
nfrProcessingTimeQuery = "nfd_nodefeaturerule_processing_duration_seconds"
nfrProcessingErrorsQuery = "nfd_nodefeaturerule_processing_errors_total"
)
Expand All @@ -53,6 +56,18 @@ var (
Name: nodeUpdateFailuresQuery,
Help: "Number of node update failures.",
})
nodeLabelsRejected = prometheus.NewCounter(prometheus.CounterOpts{
Name: nodeLabelsRejectedQuery,
Help: "Number of node labels that were rejected by nfd-master.",
})
nodeERsRejected = prometheus.NewCounter(prometheus.CounterOpts{
Name: nodeERsRejectedQuery,
Help: "Number of node extended resources that were rejected by nfd-master.",
})
nodeTaintsRejected = prometheus.NewCounter(prometheus.CounterOpts{
Name: nodeTaintsRejectedQuery,
Help: "Number of node taints that were rejected by nfd-master.",
})
nfrProcessingTime = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: nfrProcessingTimeQuery,
Expand All @@ -78,9 +93,13 @@ func registerVersion(version string) {
// runMetricsServer starts a http server to expose metrics
func runMetricsServer(port int) {
r := prometheus.NewRegistry()
r.MustRegister(buildInfo,
r.MustRegister(
buildInfo,
nodeUpdates,
nodeUpdateFailures,
nodeLabelsRejected,
nodeERsRejected,
nodeTaintsRejected,
nfrProcessingTime,
nfrProcessingErrors)

Expand Down
4 changes: 4 additions & 0 deletions pkg/nfd-master/nfd-master.go
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,7 @@ func (m *nfdMaster) filterFeatureLabels(labels Labels, features *nfdv1alpha1.Fea

if value, err := m.filterFeatureLabel(name, value, features); err != nil {
klog.ErrorS(err, "ignoring label", "labelKey", name, "labelValue", value)
nodeLabelsRejected.Inc()
} else {
outLabels[name] = value
}
Expand All @@ -523,6 +524,7 @@ func (m *nfdMaster) filterFeatureLabels(labels Labels, features *nfdv1alpha1.Fea
if value, ok := outLabels[extendedResourceName]; ok {
if _, err := strconv.Atoi(value); err != nil {
klog.ErrorS(err, "bad label value encountered for extended resource", "labelKey", extendedResourceName, "labelValue", value)
nodeERsRejected.Inc()
continue // non-numeric label can't be used
}

Expand Down Expand Up @@ -603,6 +605,7 @@ func filterTaints(taints []corev1.Taint) []corev1.Taint {
for _, taint := range taints {
if err := filterTaint(&taint); err != nil {
klog.ErrorS(err, "ignoring taint", "taint", taint)
nodeTaintsRejected.Inc()
} else {
outTaints = append(outTaints, taint)
}
Expand Down Expand Up @@ -786,6 +789,7 @@ func filterExtendedResources(features *nfdv1alpha1.Features, extendedResources E
capacity, err := filterExtendedResource(name, value, features)
if err != nil {
klog.ErrorS(err, "failed to create extended resources", "extendedResourceName", name, "extendedResourceValue", value)
nodeERsRejected.Inc()
} else {
outExtendedResources[name] = capacity
}
Expand Down

0 comments on commit 4b24cc1

Please sign in to comment.