diff --git a/controllers/datadogagent/controller_reconcile_v2.go b/controllers/datadogagent/controller_reconcile_v2.go index f3eb27513..55043babf 100644 --- a/controllers/datadogagent/controller_reconcile_v2.go +++ b/controllers/datadogagent/controller_reconcile_v2.go @@ -159,8 +159,8 @@ func (r *Reconciler) reconcileInstanceV2(ctx context.Context, logger logr.Logger // If introspection is disabled, reconcile the agent once using the empty provider `LegacyProvider` providerList := map[string]struct{}{kubernetes.LegacyProvider: {}} profiles := []datadoghqv1alpha1.DatadogAgentProfile{{}} - metrics.IntrospectionEnabled.Set(metrics.DisabledValue) - metrics.DAPEnabled.Set(metrics.DisabledValue) + metrics.IntrospectionEnabled.Set(metrics.FalseValue) + metrics.DAPEnabled.Set(metrics.FalseValue) if r.options.DatadogAgentProfileEnabled || r.options.IntrospectionEnabled { // Get a node list for profiles and introspection @@ -171,11 +171,11 @@ func (r *Reconciler) reconcileInstanceV2(ctx context.Context, logger logr.Logger if r.options.IntrospectionEnabled { providerList = kubernetes.GetProviderListFromNodeList(nodeList, logger) - metrics.IntrospectionEnabled.Set(metrics.EnabledValue) + metrics.IntrospectionEnabled.Set(metrics.TrueValue) } if r.options.DatadogAgentProfileEnabled { - metrics.DAPEnabled.Set(metrics.EnabledValue) + metrics.DAPEnabled.Set(metrics.TrueValue) var profilesByNode map[string]types.NamespacedName profiles, profilesByNode, e = r.profilesToApply(ctx, logger, nodeList, now) if err != nil { diff --git a/controllers/datadogagent_controller.go b/controllers/datadogagent_controller.go index 00ed9a609..d39e18953 100644 --- a/controllers/datadogagent_controller.go +++ b/controllers/datadogagent_controller.go @@ -32,6 +32,7 @@ import ( datadoghqv2alpha1 "github.com/DataDog/datadog-operator/apis/datadoghq/v2alpha1" "github.com/DataDog/datadog-operator/controllers/datadogagent" "github.com/DataDog/datadog-operator/controllers/datadogagent/object" + "github.com/DataDog/datadog-operator/controllers/metrics" "github.com/DataDog/datadog-operator/pkg/controller/utils/datadog" "github.com/DataDog/datadog-operator/pkg/kubernetes" edsdatadoghqv1alpha1 "github.com/DataDog/extendeddaemonset/api/v1alpha1" @@ -200,8 +201,13 @@ func (r *DatadogAgentReconciler) SetupWithManager(mgr ctrl.Manager) error { builder.Watches( &datadoghqv1alpha1.DatadogAgentProfile{}, handler.EnqueueRequestsFromMapFunc(r.enqueueRequestsForAllDDAs()), - ctrlbuilder.WithPredicates(predicate.GenerationChangedPredicate{}), - ) + ctrlbuilder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, predicate.Funcs{ + DeleteFunc: func(e event.DeleteEvent) bool { + metrics.CleanupMetricsByProfile(e.Object) + return true + }, + }), + )) } // Watch nodes and reconcile all DatadogAgents for node creation, node deletion, and node label change events diff --git a/controllers/metrics/const.go b/controllers/metrics/const.go index 85aab2333..88c16b82d 100644 --- a/controllers/metrics/const.go +++ b/controllers/metrics/const.go @@ -9,6 +9,8 @@ const ( datadogAgentSubsystem = "datadogagent" datadogAgentProfileSubsystem = "datadogagentprofile" - EnabledValue = 1.0 - DisabledValue = 0.0 + TrueValue = 1.0 + FalseValue = 0.0 + + datadogAgentProfileLabelKey = "datadogagentprofile" ) diff --git a/controllers/metrics/datadogagentprofile.go b/controllers/metrics/datadogagentprofile.go index d42015ad3..81f1f329e 100644 --- a/controllers/metrics/datadogagentprofile.go +++ b/controllers/metrics/datadogagentprofile.go @@ -7,6 +7,7 @@ package metrics import ( "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/metrics" ) @@ -19,9 +20,27 @@ var ( Help: "1 if DatadogAgentProfiles are enabled. 0 if DatadogAgentProfiles are disabled", }, ) + + // datadogagentprofile valid + DAPValid = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Subsystem: datadogAgentProfileSubsystem, + Name: "valid", + Help: "1 if the DatadogAgentProfile is valid. 0 if the DatadogAgentProfile is invalid", + }, + []string{ + datadogAgentProfileLabelKey, + }, + ) ) func init() { // Register custom metrics with the global prometheus registry metrics.Registry.MustRegister(DAPEnabled) + metrics.Registry.MustRegister(DAPValid) +} + +// CleanupMetricsByProfile deletes profile-specific prometheus metrics given a profile +func CleanupMetricsByProfile(obj client.Object) { + DAPValid.Delete(prometheus.Labels{datadogAgentProfileLabelKey: obj.GetName()}) } diff --git a/pkg/agentprofile/agent_profile.go b/pkg/agentprofile/agent_profile.go index 7590657a9..8fe2fa2d0 100644 --- a/pkg/agentprofile/agent_profile.go +++ b/pkg/agentprofile/agent_profile.go @@ -13,9 +13,11 @@ import ( "github.com/DataDog/datadog-operator/apis/datadoghq/common/v1" datadoghqv1alpha1 "github.com/DataDog/datadog-operator/apis/datadoghq/v1alpha1" "github.com/DataDog/datadog-operator/apis/datadoghq/v2alpha1" + "github.com/DataDog/datadog-operator/controllers/metrics" "github.com/DataDog/datadog-operator/pkg/controller/utils/comparison" "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -55,6 +57,7 @@ func ProfileToApply(logger logr.Logger, profile *datadoghqv1alpha1.DatadogAgentP if err := datadoghqv1alpha1.ValidateDatadogAgentProfileSpec(&profile.Spec); err != nil { logger.Error(err, "profile spec is invalid, skipping", "datadogagentprofile", profile.Name, "datadogagentprofile_namespace", profile.Namespace) + metrics.DAPValid.With(prometheus.Labels{"datadogagentprofile": profile.Name}).Set(metrics.FalseValue) profileStatus.Conditions = SetDatadogAgentProfileCondition(profileStatus.Conditions, NewDatadogAgentProfileCondition(ValidConditionType, metav1.ConditionFalse, now, InvalidConditionReason, err.Error())) profileStatus.Valid = metav1.ConditionFalse UpdateProfileStatus(profile, profileStatus, now) @@ -65,11 +68,13 @@ func ProfileToApply(logger logr.Logger, profile *datadoghqv1alpha1.DatadogAgentP matchesNode, err := profileMatchesNode(profile, node.Labels) if err != nil { logger.Error(err, "profile selector is invalid, skipping", "datadogagentprofile", profile.Name, "datadogagentprofile_namespace", profile.Namespace) + metrics.DAPValid.With(prometheus.Labels{"datadogagentprofile": profile.Name}).Set(metrics.FalseValue) profileStatus.Conditions = SetDatadogAgentProfileCondition(profileStatus.Conditions, NewDatadogAgentProfileCondition(ValidConditionType, metav1.ConditionFalse, now, InvalidConditionReason, err.Error())) profileStatus.Valid = metav1.ConditionFalse UpdateProfileStatus(profile, profileStatus, now) return profileAppliedByNode, err } + metrics.DAPValid.With(prometheus.Labels{"datadogagentprofile": profile.Name}).Set(metrics.TrueValue) profileStatus.Valid = metav1.ConditionTrue profileStatus.Conditions = SetDatadogAgentProfileCondition(profileStatus.Conditions, NewDatadogAgentProfileCondition(ValidConditionType, metav1.ConditionTrue, now, ValidConditionReason, "Valid manifest"))