diff --git a/pkg/autodiscovery/providers/cloudfoundry.go b/pkg/autodiscovery/providers/cloudfoundry.go index cce8359a2c956..9217baa968b74 100644 --- a/pkg/autodiscovery/providers/cloudfoundry.go +++ b/pkg/autodiscovery/providers/cloudfoundry.go @@ -17,7 +17,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/autodiscovery/common/utils" "github.com/DataDog/datadog-agent/pkg/autodiscovery/integration" "github.com/DataDog/datadog-agent/pkg/autodiscovery/providers/names" - "github.com/DataDog/datadog-agent/pkg/autodiscovery/telemetry" "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/util/cloudproviders/cloudfoundry" "github.com/DataDog/datadog-agent/pkg/util/log" @@ -40,7 +39,6 @@ func NewCloudFoundryConfigProvider(*config.ConfigurationProviders) (ConfigProvid var err error if cfp.bbsCache, err = cloudfoundry.GetGlobalBBSCache(); err != nil { - telemetry.Errors.Inc(names.CloudFoundryBBS) return nil, err } return cfp, nil @@ -88,7 +86,6 @@ func (cf CloudFoundryConfigProvider) getConfigsForApp(desiredLRP *cloudfoundry.D } parsedConfigs, errs := utils.ExtractTemplatesFromMap(id.String(), convertedADVal, "") for _, err := range errs { - telemetry.Errors.Inc(names.CloudFoundryBBS) log.Errorf("Cannot parse endpoint template for service %s of app %s: %s, skipping", adName, desiredLRP.AppGUID, err) } @@ -101,7 +98,6 @@ func (cf CloudFoundryConfigProvider) getConfigsForApp(desiredLRP *cloudfoundry.D // if service is found in VCAP_SERVICES (non-container service), we will run a single check per App err := cf.renderExtractedConfigs(parsedConfigs, variables, vcVal) if err != nil { - telemetry.Errors.Inc(names.CloudFoundryBBS) log.Errorf("Failed to render config for service %s of app %s: %s", adName, desiredLRP.AppGUID, err) } else { success = true @@ -116,7 +112,6 @@ func (cf CloudFoundryConfigProvider) getConfigsForApp(desiredLRP *cloudfoundry.D if allSvcsStr == "" { allSvcsStr = "no services found" } - telemetry.Errors.Inc(names.CloudFoundryBBS) log.Errorf( "Service %s for app %s has variables configured, but is not present in VCAP_SERVICES (found services: %s)", adName, desiredLRP.AppGUID, allSvcsStr, diff --git a/pkg/autodiscovery/providers/clusterchecks.go b/pkg/autodiscovery/providers/clusterchecks.go index d2bb4ea6cd922..996540908c01b 100644 --- a/pkg/autodiscovery/providers/clusterchecks.go +++ b/pkg/autodiscovery/providers/clusterchecks.go @@ -12,7 +12,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/autodiscovery/integration" "github.com/DataDog/datadog-agent/pkg/autodiscovery/providers/names" - "github.com/DataDog/datadog-agent/pkg/autodiscovery/telemetry" "github.com/DataDog/datadog-agent/pkg/clusteragent/clusterchecks/types" "github.com/DataDog/datadog-agent/pkg/config" ddErrors "github.com/DataDog/datadog-agent/pkg/errors" @@ -57,7 +56,6 @@ func NewClusterChecksConfigProvider(providerConfig *config.ConfigurationProvider if config.Datadog.GetBool("cloud_foundry") { boshID := config.Datadog.GetString("bosh_id") if boshID == "" { - telemetry.Errors.Inc(names.ClusterChecks) log.Warn("configuration variable cloud_foundry is set to true, but bosh_id is empty, can't retrieve node name") } else { c.identifier = boshID @@ -142,7 +140,6 @@ func (c *ClusterChecksConfigProvider) Collect(ctx context.Context) ([]integratio if c.dcaClient == nil { err := c.initClient() if err != nil { - telemetry.Errors.Inc(names.ClusterChecks) return nil, err } } @@ -152,7 +149,6 @@ func (c *ClusterChecksConfigProvider) Collect(ctx context.Context) ([]integratio if (ddErrors.IsRemoteService(err) || ddErrors.IsTimeout(err)) && c.withinDegradedModePeriod() { // Degraded mode: return the error to keep the configs scheduled // during a Cluster Agent / network outage - telemetry.Errors.Inc(names.ClusterChecks) return nil, err } @@ -198,7 +194,6 @@ func (c *ClusterChecksConfigProvider) heartbeatSender(ctx context.Context) { extraHeartbeatTime = currentTime log.Infof("Sent extra heartbeat at: %v", currentTime) } else { - telemetry.Errors.Inc(names.ClusterChecks) log.Warnf("Unable to send extra heartbeat to Cluster Agent, err: %v", err) } } diff --git a/pkg/autodiscovery/providers/consul.go b/pkg/autodiscovery/providers/consul.go index c2f0885007fa8..9c57908db7d07 100644 --- a/pkg/autodiscovery/providers/consul.go +++ b/pkg/autodiscovery/providers/consul.go @@ -20,7 +20,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/autodiscovery/common/utils" "github.com/DataDog/datadog-agent/pkg/autodiscovery/integration" "github.com/DataDog/datadog-agent/pkg/autodiscovery/providers/names" - "github.com/DataDog/datadog-agent/pkg/autodiscovery/telemetry" "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -90,7 +89,6 @@ func NewConsulConfigProvider(providerConfig *config.ConfigurationProviders) (Con cache := newProviderCache() cli, err := consul.NewClient(clientCfg) if err != nil { - telemetry.Errors.Inc(names.Consul) return nil, fmt.Errorf("Unable to instantiate the consul client: %s", err) } @@ -138,7 +136,6 @@ func (p *ConsulConfigProvider) IsUpToDate(ctx context.Context) (bool, error) { queryOptions = queryOptions.WithContext(ctx) identifiers, _, err := kv.List(p.TemplateDir, queryOptions) if err != nil { - telemetry.Errors.Inc(names.Consul) return false, err } if p.cache.count != len(identifiers) { @@ -174,7 +171,6 @@ func (p *ConsulConfigProvider) getIdentifiers(ctx context.Context, prefix string // TODO: decide on the query parameters. keys, _, err := kv.Keys(prefix, "", queryOptions) if err != nil { - telemetry.Errors.Inc(names.Consul) log.Error("Can't get templates keys from consul: ", err) return identifiers } @@ -225,21 +221,18 @@ func (p *ConsulConfigProvider) getTemplates(ctx context.Context, key string) []i checkNames, err := p.getCheckNames(ctx, checkNameKey) if err != nil { - telemetry.Errors.Inc(names.Consul) log.Errorf("Failed to retrieve check names at %s. Error: %s", checkNameKey, err) return templates } initConfigs, err := p.getJSONValue(ctx, initKey) if err != nil { - telemetry.Errors.Inc(names.Consul) log.Errorf("Failed to retrieve init configs at %s. Error: %s", initKey, err) return templates } instances, err := p.getJSONValue(ctx, instanceKey) if err != nil { - telemetry.Errors.Inc(names.Consul) log.Errorf("Failed to retrieve instances at %s. Error: %s", instanceKey, err) return templates } diff --git a/pkg/autodiscovery/providers/container.go b/pkg/autodiscovery/providers/container.go index 9424677499a04..d478de660ada0 100644 --- a/pkg/autodiscovery/providers/container.go +++ b/pkg/autodiscovery/providers/container.go @@ -148,11 +148,12 @@ func (k *ContainerConfigProvider) processEvents(evBundle workloadmeta.EventBundl delete(k.configErrors, entityName) default: - telemetry.Errors.Inc(names.KubeContainer) log.Errorf("cannot handle event of type %d", event.Type) } } + telemetry.Errors.Set(float64(len(k.configErrors)), names.KubeContainer) + return changes } @@ -188,7 +189,6 @@ func (k *ContainerConfigProvider) generateConfig(e workloadmeta.Entity) ([]integ for _, podContainer := range entity.GetAllContainers() { container, err := k.workloadmetaStore.GetContainer(podContainer.ID) if err != nil { - telemetry.Errors.Inc(names.KubeContainer) log.Debugf("Pod %q has reference to non-existing container %q", entity.Name, podContainer.ID) continue } @@ -248,7 +248,6 @@ func (k *ContainerConfigProvider) generateConfig(e workloadmeta.Entity) ([]integ containerNames)...) default: - telemetry.Errors.Inc(names.KubeContainer) log.Errorf("cannot handle entity of kind %s", e.GetID().Kind) } diff --git a/pkg/autodiscovery/providers/endpointschecks.go b/pkg/autodiscovery/providers/endpointschecks.go index 032d27e7f090d..4774a492762b7 100644 --- a/pkg/autodiscovery/providers/endpointschecks.go +++ b/pkg/autodiscovery/providers/endpointschecks.go @@ -14,7 +14,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/autodiscovery/integration" "github.com/DataDog/datadog-agent/pkg/autodiscovery/providers/names" - "github.com/DataDog/datadog-agent/pkg/autodiscovery/telemetry" "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/errors" "github.com/DataDog/datadog-agent/pkg/util/clusteragent" @@ -47,7 +46,6 @@ func NewEndpointsChecksConfigProvider(providerConfig *config.ConfigurationProvid var err error c.nodeName, err = getNodename(context.TODO()) if err != nil { - telemetry.Errors.Inc(names.EndpointsChecks) log.Errorf("Cannot get node name: %s", err) return nil, err } @@ -97,7 +95,6 @@ func (c *EndpointsChecksConfigProvider) Collect(ctx context.Context) ([]integrat return nil, nil } - telemetry.Errors.Inc(names.EndpointsChecks) return nil, err } @@ -119,7 +116,6 @@ func getNodename(ctx context.Context) (string, error) { } ku, err := kubelet.GetKubeUtil() if err != nil { - telemetry.Errors.Inc(names.EndpointsChecks) log.Errorf("Cannot get kubeUtil object: %s", err) return "", err } @@ -132,7 +128,6 @@ func (c *EndpointsChecksConfigProvider) initClient() error { if err == nil { c.dcaClient = dcaClient } - telemetry.Errors.Inc(names.EndpointsChecks) return err } diff --git a/pkg/autodiscovery/providers/etcd.go b/pkg/autodiscovery/providers/etcd.go index 761e9a2f00ccd..f59310a47100e 100644 --- a/pkg/autodiscovery/providers/etcd.go +++ b/pkg/autodiscovery/providers/etcd.go @@ -19,7 +19,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/autodiscovery/common/utils" "github.com/DataDog/datadog-agent/pkg/autodiscovery/integration" "github.com/DataDog/datadog-agent/pkg/autodiscovery/providers/names" - "github.com/DataDog/datadog-agent/pkg/autodiscovery/telemetry" "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -55,7 +54,6 @@ func NewEtcdConfigProvider(providerConfig *config.ConfigurationProviders) (Confi cl, err := client.New(clientCfg) if err != nil { - telemetry.Errors.Inc(names.Etcd) return nil, fmt.Errorf("Unable to instantiate the etcd client: %s", err) } cache := newProviderCache() @@ -87,7 +85,6 @@ func (p *EtcdConfigProvider) getIdentifiers(ctx context.Context, key string) []s identifiers := make([]string, 0) resp, err := p.Client.Get(ctx, key, &client.GetOptions{Recursive: true}) if err != nil { - telemetry.Errors.Inc(names.Etcd) log.Error("Can't get templates keys from etcd: ", err) return identifiers } @@ -110,21 +107,18 @@ func (p *EtcdConfigProvider) getTemplates(ctx context.Context, key string) []int checkNames, err := p.getCheckNames(ctx, checkNameKey) if err != nil { - telemetry.Errors.Inc(names.Etcd) log.Errorf("Failed to retrieve check names at %s. Error: %s", checkNameKey, err) return nil } initConfigs, err := p.getJSONValue(ctx, initKey) if err != nil { - telemetry.Errors.Inc(names.Etcd) log.Errorf("Failed to retrieve init configs at %s. Error: %s", initKey, err) return nil } instances, err := p.getJSONValue(ctx, instanceKey) if err != nil { - telemetry.Errors.Inc(names.Etcd) log.Errorf("Failed to retrieve instances at %s. Error: %s", instanceKey, err) return nil } @@ -169,7 +163,6 @@ func (p *EtcdConfigProvider) IsUpToDate(ctx context.Context) (bool, error) { resp, err := p.Client.Get(ctx, p.templateDir, &client.GetOptions{Recursive: true}) if err != nil { - telemetry.Errors.Inc(names.Etcd) return false, err } identifiers := resp.Node.Nodes diff --git a/pkg/autodiscovery/providers/file.go b/pkg/autodiscovery/providers/file.go index 45efefa39f3b3..391fc534ac53e 100644 --- a/pkg/autodiscovery/providers/file.go +++ b/pkg/autodiscovery/providers/file.go @@ -30,7 +30,6 @@ func NewFileConfigProvider() *FileConfigProvider { func (c *FileConfigProvider) Collect(ctx context.Context) ([]integration.Config, error) { configs, errors, err := ReadConfigFiles(WithoutAdvancedAD) if err != nil { - telemetry.Errors.Inc(names.File) return nil, err } diff --git a/pkg/autodiscovery/providers/kube_endpoints.go b/pkg/autodiscovery/providers/kube_endpoints.go index 9b45f8af57a25..3d19ccd46b072 100644 --- a/pkg/autodiscovery/providers/kube_endpoints.go +++ b/pkg/autodiscovery/providers/kube_endpoints.go @@ -62,13 +62,11 @@ func NewKubeEndpointsConfigProvider(*config.ConfigurationProviders) (ConfigProvi // Using GetAPIClient (no wait) as Client should already be initialized by Cluster Agent main entrypoint before ac, err := apiserver.GetAPIClient() if err != nil { - telemetry.Errors.Inc(names.KubeEndpoints) return nil, fmt.Errorf("cannot connect to apiserver: %s", err) } servicesInformer := ac.InformerFactory.Core().V1().Services() if servicesInformer == nil { - telemetry.Errors.Inc(names.KubeEndpoints) return nil, fmt.Errorf("cannot get service informer: %s", err) } @@ -83,13 +81,11 @@ func NewKubeEndpointsConfigProvider(*config.ConfigurationProviders) (ConfigProvi UpdateFunc: p.invalidateIfChangedService, DeleteFunc: p.invalidate, }); err != nil { - telemetry.Errors.Inc(names.KubeEndpoints) return nil, fmt.Errorf("cannot add event handler to service informer: %s", err) } endpointsInformer := ac.InformerFactory.Core().V1().Endpoints() if endpointsInformer == nil { - telemetry.Errors.Inc(names.KubeEndpoints) return nil, fmt.Errorf("cannot get endpoint informer: %s", err) } @@ -98,7 +94,6 @@ func NewKubeEndpointsConfigProvider(*config.ConfigurationProviders) (ConfigProvi if _, err := endpointsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ UpdateFunc: p.invalidateIfChangedEndpoints, }); err != nil { - telemetry.Errors.Inc(names.KubeEndpoints) return nil, fmt.Errorf("cannot add event handler to endpoint informer: %s", err) } @@ -123,7 +118,6 @@ func (k *kubeEndpointsConfigProvider) Collect(ctx context.Context) ([]integratio for _, config := range parsedConfigsInfo { kep, err := k.endpointsLister.Endpoints(config.namespace).Get(config.name) if err != nil { - telemetry.Errors.Inc(names.KubeEndpoints) log.Errorf("Cannot get Kubernetes endpoints: %s", err) continue } @@ -147,14 +141,12 @@ func (k *kubeEndpointsConfigProvider) invalidate(obj interface{}) { // It's possible that we got a DeletedFinalStateUnknown here deletedState, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { - telemetry.Errors.Inc(names.KubeEndpoints) log.Errorf("Received unexpected object: %T", obj) return } castedObj, ok = deletedState.Obj.(*v1.Service) if !ok { - telemetry.Errors.Inc(names.KubeEndpoints) log.Errorf("Expected DeletedFinalStateUnknown to contain *v1.Service, got: %T", deletedState.Obj) return } @@ -172,14 +164,12 @@ func (k *kubeEndpointsConfigProvider) invalidateIfChangedService(old, obj interf // nil pointers are safely handled by the casting logic. castedObj, ok := obj.(*v1.Service) if !ok { - telemetry.Errors.Inc(names.KubeEndpoints) log.Errorf("Expected a *v1.Service type, got: %T", obj) return } // Cast the old object, invalidate on casting error castedOld, ok := old.(*v1.Service) if !ok { - telemetry.Errors.Inc(names.KubeEndpoints) log.Errorf("Expected a *v1.Service type, got: %T", old) k.setUpToDate(false) return @@ -200,14 +190,12 @@ func (k *kubeEndpointsConfigProvider) invalidateIfChangedEndpoints(old, obj inte // nil pointers are safely handled by the casting logic. castedObj, ok := obj.(*v1.Endpoints) if !ok { - telemetry.Errors.Inc(names.KubeEndpoints) log.Errorf("Expected an *v1.Endpoints type, got: %T", obj) return } // Cast the old object, invalidate on casting error castedOld, ok := old.(*v1.Endpoints) if !ok { - telemetry.Errors.Inc(names.KubeEndpoints) log.Errorf("Expected a *v1.Endpoints type, got: %T", old) k.setUpToDate(false) return @@ -249,7 +237,6 @@ func (k *kubeEndpointsConfigProvider) parseServiceAnnotationsForEndpoints(servic endptConf, errors := utils.ExtractTemplatesFromPodAnnotations(endpointsID, svc.Annotations, kubeEndpointID) for _, err := range errors { - telemetry.Errors.Inc(names.KubeEndpoints) log.Errorf("Cannot parse endpoint template for service %s/%s: %s", svc.Namespace, svc.Name, err) } @@ -285,13 +272,14 @@ func (k *kubeEndpointsConfigProvider) parseServiceAnnotationsForEndpoints(servic k.cleanErrorsOfDeletedEndpoints(setEndpointIDs) + telemetry.Errors.Set(float64(len(k.configErrors)), names.KubeEndpoints) + return configsInfo } // generateConfigs creates a config template for each Endpoints IP func generateConfigs(tpl integration.Config, resolveMode endpointResolveMode, kep *v1.Endpoints) []integration.Config { if kep == nil { - telemetry.Errors.Inc(names.KubeEndpoints) log.Warn("Nil Kubernetes Endpoints object, cannot generate config templates") return []integration.Config{tpl} } @@ -306,7 +294,6 @@ func generateConfigs(tpl integration.Config, resolveMode endpointResolveMode, ke case kubeEndpointResolveIP: // In case of unknown value, fallback to auto default: - telemetry.Errors.Inc(names.KubeEndpoints) log.Warnf("Unknown resolve value: %s for endpoint: %s/%s - fallback to auto mode", resolveMode, namespace, name) fallthrough // Auto or empty (default to auto): we try to resolve the POD behind this address diff --git a/pkg/autodiscovery/providers/kube_endpoints_file.go b/pkg/autodiscovery/providers/kube_endpoints_file.go index e8d3127108cfc..352192186996b 100644 --- a/pkg/autodiscovery/providers/kube_endpoints_file.go +++ b/pkg/autodiscovery/providers/kube_endpoints_file.go @@ -15,7 +15,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/autodiscovery/common/utils" "github.com/DataDog/datadog-agent/pkg/autodiscovery/integration" "github.com/DataDog/datadog-agent/pkg/autodiscovery/providers/names" - "github.com/DataDog/datadog-agent/pkg/autodiscovery/telemetry" "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/util/kubernetes/apiserver" "github.com/DataDog/datadog-agent/pkg/util/log" @@ -76,13 +75,11 @@ func NewKubeEndpointsFileConfigProvider(*config.ConfigurationProviders) (ConfigP ac, err := apiserver.GetAPIClient() if err != nil { - telemetry.Errors.Inc(names.KubeEndpointsFile) return nil, fmt.Errorf("cannot connect to apiserver: %s", err) } epInformer := ac.InformerFactory.Core().V1().Endpoints() if epInformer == nil { - telemetry.Errors.Inc(names.KubeEndpointsFile) return nil, fmt.Errorf("cannot get endpoint informer: %s", err) } @@ -92,7 +89,6 @@ func NewKubeEndpointsFileConfigProvider(*config.ConfigurationProviders) (ConfigP UpdateFunc: provider.updateHandler, DeleteFunc: provider.deleteHandler, }); err != nil { - telemetry.Errors.Inc(names.KubeEndpointsFile) return nil, fmt.Errorf("cannot add event handler to endpoint informer: %s", err) } @@ -135,7 +131,6 @@ func (p *KubeEndpointsFileConfigProvider) setUpToDate(v bool) { func (p *KubeEndpointsFileConfigProvider) addHandler(obj interface{}) { ep, ok := obj.(*v1.Endpoints) if !ok { - telemetry.Errors.Inc(names.KubeEndpointsFile) log.Errorf("Expected an Endpoints type, got: %T", obj) return } @@ -149,7 +144,6 @@ func (p *KubeEndpointsFileConfigProvider) addHandler(obj interface{}) { func (p *KubeEndpointsFileConfigProvider) updateHandler(old, new interface{}) { newEp, ok := new.(*v1.Endpoints) if !ok { - telemetry.Errors.Inc(names.KubeEndpointsFile) log.Errorf("Expected an Endpoints type, got: %T", new) return } @@ -160,7 +154,6 @@ func (p *KubeEndpointsFileConfigProvider) updateHandler(old, new interface{}) { oldEp, ok := old.(*v1.Endpoints) if !ok { - telemetry.Errors.Inc(names.KubeEndpointsFile) log.Errorf("Expected a Endpoints type, got: %T", old) return } @@ -180,7 +173,6 @@ func (p *KubeEndpointsFileConfigProvider) updateHandler(old, new interface{}) { func (p *KubeEndpointsFileConfigProvider) deleteHandler(obj interface{}) { ep, ok := obj.(*v1.Endpoints) if !ok { - telemetry.Errors.Inc(names.KubeEndpointsFile) log.Errorf("Expected an Endpoints type, got: %T", obj) return } diff --git a/pkg/autodiscovery/providers/kube_services.go b/pkg/autodiscovery/providers/kube_services.go index e2e2a3183616e..a74c2d4337533 100644 --- a/pkg/autodiscovery/providers/kube_services.go +++ b/pkg/autodiscovery/providers/kube_services.go @@ -44,13 +44,11 @@ func NewKubeServiceConfigProvider(*config.ConfigurationProviders) (ConfigProvide // Using GetAPIClient() (no retry) ac, err := apiserver.GetAPIClient() if err != nil { - telemetry.Errors.Inc(names.KubeServices) return nil, fmt.Errorf("cannot connect to apiserver: %s", err) } servicesInformer := ac.InformerFactory.Core().V1().Services() if servicesInformer == nil { - telemetry.Errors.Inc(names.KubeServices) return nil, fmt.Errorf("cannot get service informer: %s", err) } @@ -64,7 +62,6 @@ func NewKubeServiceConfigProvider(*config.ConfigurationProviders) (ConfigProvide UpdateFunc: p.invalidateIfChanged, DeleteFunc: p.invalidate, }); err != nil { - telemetry.Errors.Inc(names.KubeServices) return nil, fmt.Errorf("cannot add event handler to services informer: %s", err) } @@ -104,14 +101,12 @@ func (k *KubeServiceConfigProvider) invalidateIfChanged(old, obj interface{}) { // nil pointers are safely handled by the casting logic. castedObj, ok := obj.(*v1.Service) if !ok { - telemetry.Errors.Inc(names.KubeServices) log.Errorf("Expected a *v1.Service type, got: %T", obj) return } // Cast the old object, invalidate on casting error castedOld, ok := old.(*v1.Service) if !ok { - telemetry.Errors.Inc(names.KubeServices) log.Errorf("Expected a *v1.Service type, got: %T", old) k.upToDate = false return @@ -171,7 +166,6 @@ func (k *KubeServiceConfigProvider) parseServiceAnnotations(services []*v1.Servi if len(errors) > 0 { errMsgSet := make(ErrorMsgSet) for _, err := range errors { - telemetry.Errors.Inc(names.KubeServices) log.Errorf("Cannot parse service template for service %s/%s: %s", svc.Namespace, svc.Name, err) errMsgSet[err.Error()] = struct{}{} } @@ -194,6 +188,8 @@ func (k *KubeServiceConfigProvider) parseServiceAnnotations(services []*v1.Servi k.cleanErrorsOfDeletedServices(setServiceIDs) + telemetry.Errors.Set(float64(len(k.configErrors)), names.KubeServices) + return configs, nil } diff --git a/pkg/autodiscovery/providers/kube_services_file.go b/pkg/autodiscovery/providers/kube_services_file.go index 863fe0d97683c..2cc4302d519b0 100644 --- a/pkg/autodiscovery/providers/kube_services_file.go +++ b/pkg/autodiscovery/providers/kube_services_file.go @@ -12,7 +12,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/autodiscovery/integration" "github.com/DataDog/datadog-agent/pkg/autodiscovery/providers/names" - "github.com/DataDog/datadog-agent/pkg/autodiscovery/telemetry" "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/util/kubernetes/apiserver" ) @@ -31,7 +30,6 @@ func NewKubeServiceFileConfigProvider(*config.ConfigurationProviders) (ConfigPro func (c *KubeServiceFileConfigProvider) Collect(ctx context.Context) ([]integration.Config, error) { configs, _, err := ReadConfigFiles(WithAdvancedADOnly) if err != nil { - telemetry.Errors.Inc(names.KubeServicesFile) return nil, err } diff --git a/pkg/autodiscovery/providers/prometheus_pods.go b/pkg/autodiscovery/providers/prometheus_pods.go index 3a01a6bccb06c..cd289c7cb98c5 100644 --- a/pkg/autodiscovery/providers/prometheus_pods.go +++ b/pkg/autodiscovery/providers/prometheus_pods.go @@ -14,7 +14,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/autodiscovery/common/utils" "github.com/DataDog/datadog-agent/pkg/autodiscovery/integration" "github.com/DataDog/datadog-agent/pkg/autodiscovery/providers/names" - "github.com/DataDog/datadog-agent/pkg/autodiscovery/telemetry" "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/util/kubernetes/kubelet" ) @@ -31,7 +30,6 @@ type PrometheusPodsConfigProvider struct { func NewPrometheusPodsConfigProvider(*config.ConfigurationProviders) (ConfigProvider, error) { checks, err := getPrometheusConfigs() if err != nil { - telemetry.Errors.Inc(names.PrometheusPods) return nil, err } @@ -52,14 +50,12 @@ func (p *PrometheusPodsConfigProvider) Collect(ctx context.Context) ([]integrati if p.kubelet == nil { p.kubelet, err = kubelet.GetKubeUtil() if err != nil { - telemetry.Errors.Inc(names.PrometheusPods) return []integration.Config{}, err } } pods, err := p.kubelet.GetLocalPodList(ctx) if err != nil { - telemetry.Errors.Inc(names.PrometheusPods) return []integration.Config{}, err } diff --git a/pkg/autodiscovery/providers/prometheus_services.go b/pkg/autodiscovery/providers/prometheus_services.go index fd632b3689066..65656845bbbd8 100644 --- a/pkg/autodiscovery/providers/prometheus_services.go +++ b/pkg/autodiscovery/providers/prometheus_services.go @@ -17,7 +17,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/autodiscovery/common/utils" "github.com/DataDog/datadog-agent/pkg/autodiscovery/integration" "github.com/DataDog/datadog-agent/pkg/autodiscovery/providers/names" - "github.com/DataDog/datadog-agent/pkg/autodiscovery/telemetry" "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/util/kubernetes/apiserver" "github.com/DataDog/datadog-agent/pkg/util/log" @@ -70,13 +69,11 @@ func NewPrometheusServicesConfigProvider(*config.ConfigurationProviders) (Config // Using GetAPIClient (no wait) as Client should already be initialized by Cluster Agent main entrypoint before ac, err := apiserver.GetAPIClient() if err != nil { - telemetry.Errors.Inc(names.PrometheusServices) return nil, fmt.Errorf("cannot connect to apiserver: %s", err) } servicesInformer := ac.InformerFactory.Core().V1().Services() if servicesInformer == nil { - telemetry.Errors.Inc(names.PrometheusServices) return nil, errors.New("cannot get services informer") } @@ -87,7 +84,6 @@ func NewPrometheusServicesConfigProvider(*config.ConfigurationProviders) (Config if collectEndpoints { endpointsInformer = ac.InformerFactory.Core().V1().Endpoints() if endpointsInformer == nil { - telemetry.Errors.Inc(names.PrometheusServices) return nil, errors.New("cannot get endpoints informer") } endpointsLister = endpointsInformer.Lister() @@ -100,7 +96,6 @@ func NewPrometheusServicesConfigProvider(*config.ConfigurationProviders) (Config checks, err := getPrometheusConfigs() if err != nil { - telemetry.Errors.Inc(names.PrometheusServices) return nil, err } @@ -111,7 +106,6 @@ func NewPrometheusServicesConfigProvider(*config.ConfigurationProviders) (Config UpdateFunc: p.invalidateIfChanged, DeleteFunc: p.invalidate, }); err != nil { - telemetry.Errors.Inc(names.PrometheusServices) return nil, fmt.Errorf("cannot add event handler to services informer: %s", err) } @@ -120,7 +114,6 @@ func NewPrometheusServicesConfigProvider(*config.ConfigurationProviders) (Config AddFunc: p.invalidateIfAddedEndpoints, UpdateFunc: p.invalidateIfChangedEndpoints, }); err != nil { - telemetry.Errors.Inc(names.PrometheusServices) return nil, fmt.Errorf("cannot add event handler to endpoints informer: %s", err) } } @@ -171,7 +164,6 @@ func (p *PrometheusServicesConfigProvider) Collect(ctx context.Context) ([]integ if k8serrors.IsNotFound(err) { continue } - telemetry.Errors.Inc(names.PrometheusServices) return nil, err } @@ -214,14 +206,12 @@ func (p *PrometheusServicesConfigProvider) invalidate(obj interface{}) { // It's possible that we got a DeletedFinalStateUnknown here deletedState, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { - telemetry.Errors.Inc(names.PrometheusServices) log.Errorf("Received unexpected object: %T", obj) return } castedObj, ok = deletedState.Obj.(*v1.Service) if !ok { - telemetry.Errors.Inc(names.PrometheusServices) log.Errorf("Expected DeletedFinalStateUnknown to contain *v1.Service, got: %T", deletedState.Obj) return } @@ -241,7 +231,6 @@ func (p *PrometheusServicesConfigProvider) invalidateIfChanged(old, obj interfac // nil pointers are safely handled by the casting logic. castedObj, ok := obj.(*v1.Service) if !ok { - telemetry.Errors.Inc(names.PrometheusServices) log.Errorf("Expected a Service type, got: %T", obj) return } @@ -249,7 +238,6 @@ func (p *PrometheusServicesConfigProvider) invalidateIfChanged(old, obj interfac // Cast the old object, invalidate on casting error castedOld, ok := old.(*v1.Service) if !ok { - telemetry.Errors.Inc(names.PrometheusServices) log.Errorf("Expected a Service type, got: %T", old) p.setUpToDate(false) return @@ -278,7 +266,6 @@ func (p *PrometheusServicesConfigProvider) invalidateIfChangedEndpoints(old, obj // nil pointers are safely handled by the casting logic. castedObj, ok := obj.(*v1.Endpoints) if !ok { - telemetry.Errors.Inc(names.PrometheusServices) log.Errorf("Expected a Endpoints type, got: %T", obj) return } diff --git a/pkg/autodiscovery/providers/zookeeper.go b/pkg/autodiscovery/providers/zookeeper.go index 2798f2333f3bf..dbf88826c27ed 100644 --- a/pkg/autodiscovery/providers/zookeeper.go +++ b/pkg/autodiscovery/providers/zookeeper.go @@ -20,7 +20,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/autodiscovery/common/utils" "github.com/DataDog/datadog-agent/pkg/autodiscovery/integration" "github.com/DataDog/datadog-agent/pkg/autodiscovery/providers/names" - "github.com/DataDog/datadog-agent/pkg/autodiscovery/telemetry" "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -50,7 +49,6 @@ func NewZookeeperConfigProvider(providerConfig *config.ConfigurationProviders) ( c, _, err := zk.Connect(urls, sessionTimeout) if err != nil { - telemetry.Errors.Inc(names.Zookeeper) return nil, fmt.Errorf("ZookeeperConfigProvider: couldn't connect to %q (%s): %s", providerConfig.TemplateURL, strings.Join(urls, ", "), err) } cache := newProviderCache() @@ -72,7 +70,6 @@ func (z *ZookeeperConfigProvider) Collect(ctx context.Context) ([]integration.Co configs := make([]integration.Config, 0) identifiers, err := z.getIdentifiers(z.templateDir) if err != nil { - telemetry.Errors.Inc(names.Zookeeper) return nil, err } for _, id := range identifiers { @@ -92,7 +89,6 @@ func (z *ZookeeperConfigProvider) IsUpToDate(ctx context.Context) (bool, error) identifiers, err := z.getIdentifiers(z.templateDir) if err != nil { - telemetry.Errors.Inc(names.Zookeeper) return false, err } outdated := z.cache.mostRecentMod @@ -117,7 +113,6 @@ func (z *ZookeeperConfigProvider) IsUpToDate(ctx context.Context) (bool, error) gcnPath := path.Join(identifier, gcn) _, stat, err := z.client.Get(gcnPath) if err != nil { - telemetry.Errors.Inc(names.Zookeeper) return false, fmt.Errorf("couldn't get key '%s' from zookeeper: %s", identifier, err) } outdated = math.Max(float64(stat.Mtime), outdated) @@ -176,28 +171,24 @@ func (z *ZookeeperConfigProvider) getTemplates(key string) []integration.Config rawNames, _, err := z.client.Get(checkNameKey) if err != nil { - telemetry.Errors.Inc(names.Zookeeper) log.Errorf("Couldn't get check names from key '%s' in zookeeper: %s", key, err) return nil } checkNames, err := utils.ParseCheckNames(string(rawNames)) if err != nil { - telemetry.Errors.Inc(names.Zookeeper) log.Errorf("Failed to retrieve check names at %s. Error: %s", checkNameKey, err) return nil } initConfigs, err := z.getJSONValue(initKey) if err != nil { - telemetry.Errors.Inc(names.Zookeeper) log.Errorf("Failed to retrieve init configs at %s. Error: %s", initKey, err) return nil } instances, err := z.getJSONValue(instanceKey) if err != nil { - telemetry.Errors.Inc(names.Zookeeper) log.Errorf("Failed to retrieve instances at %s. Error: %s", instanceKey, err) return nil } diff --git a/pkg/autodiscovery/telemetry/telemetry.go b/pkg/autodiscovery/telemetry/telemetry.go index 2e9e020209906..019854afe3fb2 100644 --- a/pkg/autodiscovery/telemetry/telemetry.go +++ b/pkg/autodiscovery/telemetry/telemetry.go @@ -44,12 +44,12 @@ var ( commonOpts, ) - // Errors tracks the number of AD errors found by AD providers. + // Errors tracks the current number of AD configs with errors by AD providers. Errors = telemetry.NewGaugeWithOpts( subsystem, "errors", []string{"provider"}, - "Number of Autodiscovery errors by provider.", + "Current number of Autodiscovery configs with errors by provider.", commonOpts, )