diff --git a/x-pack/metricbeat/module/azure/mock_service.go b/x-pack/metricbeat/module/azure/mock_service.go index dc578db16fad..9626952fa6d1 100644 --- a/x-pack/metricbeat/module/azure/mock_service.go +++ b/x-pack/metricbeat/module/azure/mock_service.go @@ -29,17 +29,12 @@ func (client *MockService) GetResourceDefinitions(id []string, group []string, r return args.Get(0).([]*armresources.GenericResourceExpanded), args.Error(1) } -// GetMetricDefinitions is a mock function for the azure service -func (client *MockService) GetMetricDefinitions(resourceId string, namespace string) (armmonitor.MetricDefinitionCollection, error) { +// GetMetricDefinitionsWithRetry is a mock function for the azure service +func (client *MockService) GetMetricDefinitionsWithRetry(resourceId string, namespace string) (armmonitor.MetricDefinitionCollection, error) { args := client.Called(resourceId, namespace) return args.Get(0).(armmonitor.MetricDefinitionCollection), args.Error(1) } -// GetMetricDefinitionsWithRetry is a mock function for the azure service -func (client *MockService) GetMetricDefinitionsWithRetry(resource *armresources.GenericResourceExpanded, namespace string) (armmonitor.MetricDefinitionCollection, error) { - return client.GetMetricDefinitions(*resource.ID, namespace) -} - // GetMetricNamespaces is a mock function for the azure service func (client *MockService) GetMetricNamespaces(resourceId string) (armmonitor.MetricNamespaceCollection, error) { args := client.Called(resourceId) diff --git a/x-pack/metricbeat/module/azure/monitor/client_helper.go b/x-pack/metricbeat/module/azure/monitor/client_helper.go index fd775b9eecc2..5fa5b9964e67 100644 --- a/x-pack/metricbeat/module/azure/monitor/client_helper.go +++ b/x-pack/metricbeat/module/azure/monitor/client_helper.go @@ -33,7 +33,7 @@ func mapMetrics(client *azure.Client, resources []*armresources.GenericResourceE metricDefinitions, exists := namespaceMetrics[metric.Namespace] if !exists { - metricDefinitions, err = client.AzureMonitorService.GetMetricDefinitionsWithRetry(resource, metric.Namespace) + metricDefinitions, err = client.AzureMonitorService.GetMetricDefinitionsWithRetry(*resource.ID, metric.Namespace) if err != nil { return nil, err } diff --git a/x-pack/metricbeat/module/azure/monitor_service.go b/x-pack/metricbeat/module/azure/monitor_service.go index bfa49fdb4194..ba8420fff461 100644 --- a/x-pack/metricbeat/module/azure/monitor_service.go +++ b/x-pack/metricbeat/module/azure/monitor_service.go @@ -200,8 +200,43 @@ func (service *MonitorService) GetMetricNamespaces(resourceId string) (armmonito return metricNamespaceCollection, nil } -// GetMetricDefinitions will return all supported metrics based on the resource id and namespace -func (service *MonitorService) GetMetricDefinitions(resourceId string, namespace string) (armmonitor.MetricDefinitionCollection, error) { +// sleepIfPossible will check for the error 429 in the azure response, and look for the retry after header. +// If the header is present, then metricbeat will sleep for that duration, otherwise it will return an error. +func (service *MonitorService) sleepIfPossible(err error, resourceId string, namespace string) error { + errorMsg := "no metric definitions were found for resource " + resourceId + " and namespace " + namespace + + var respError *azcore.ResponseError + ok := errors.As(err, &respError) + if !ok { + return fmt.Errorf("%s, failed to cast error to azcore.ResponseError", errorMsg) + } + // Check for TooManyRequests error and retry if it is the case + if respError.StatusCode != http.StatusTooManyRequests { + return fmt.Errorf("%s, %w", errorMsg, err) + } + + // Check if the error has the header Retry After. + // If it is present, then we should try to make this request again. + retryAfter := respError.RawResponse.Header.Get("Retry-After") + if retryAfter == "" { + return fmt.Errorf("%s %w, failed to find Retry-After header", errorMsg, err) + } + + duration, errD := time.ParseDuration(retryAfter + "s") + if errD != nil { + return fmt.Errorf("%s, failed to parse duration %s from header retry after", errorMsg, retryAfter) + } + + service.log.Infof("%s, metricbeat will try again after %s seconds", errorMsg, retryAfter) + time.Sleep(duration) + service.log.Infof("%s, metricbeat finished sleeping and will try again now", errorMsg) + + return nil +} + +// GetMetricDefinitionsWithRetry will return all supported metrics based on the resource id and namespace +// It will check for an error when moving the pager to the next page, and retry if possible. +func (service *MonitorService) GetMetricDefinitionsWithRetry(resourceId string, namespace string) (armmonitor.MetricDefinitionCollection, error) { opts := &armmonitor.MetricDefinitionsClientListOptions{} if namespace != "" { @@ -213,53 +248,22 @@ func (service *MonitorService) GetMetricDefinitions(resourceId string, namespace metricDefinitionCollection := armmonitor.MetricDefinitionCollection{} for pager.More() { - nextPage, err := pager.NextPage(service.context) - if err != nil { - return armmonitor.MetricDefinitionCollection{}, err - } - - metricDefinitionCollection.Value = append(metricDefinitionCollection.Value, nextPage.Value...) - } - return metricDefinitionCollection, nil -} - -func (service *MonitorService) GetMetricDefinitionsWithRetry(resource *armresources.GenericResourceExpanded, namespace string) (armmonitor.MetricDefinitionCollection, error) { - for { - - metricDefinitions, err := service.GetMetricDefinitions(*resource.ID, namespace) - if err != nil { - errorMsg := "no metric definitions were found for resource " + *resource.ID + " and namespace " + namespace - - var respError *azcore.ResponseError - ok := errors.As(err, &respError) - if !ok { - return metricDefinitions, fmt.Errorf("%s, failed to cast error to azcore.ResponseError", errorMsg) - } - // Check for TooManyRequests error and retry if it is the case - if respError.StatusCode != http.StatusTooManyRequests { - return metricDefinitions, fmt.Errorf("%s, %w", errorMsg, err) - } - - // Check if the error has the header Retry After. - // If it is present, then we should try to make this request again. - retryAfter := respError.RawResponse.Header.Get("Retry-After") - if retryAfter == "" { - return metricDefinitions, fmt.Errorf("%s %w, failed to find Retry-After header", errorMsg, err) - } - - duration, errD := time.ParseDuration(retryAfter + "s") - if errD != nil { - return metricDefinitions, fmt.Errorf("%s, failed to parse duration %s from header retry after", errorMsg, retryAfter) + for { + nextPage, err := pager.NextPage(service.context) + if err != nil { + retryError := service.sleepIfPossible(err, resourceId, namespace) + if retryError != nil { + return armmonitor.MetricDefinitionCollection{}, err + } + } else { + metricDefinitionCollection.Value = append(metricDefinitionCollection.Value, nextPage.Value...) + break } - - service.log.Infof("%s, metricbeat will try again after %s seconds", errorMsg, retryAfter) - time.Sleep(duration) - service.log.Infof("%s, metricbeat finished sleeping and will try again now", errorMsg) - } else { - return metricDefinitions, err } } + + return metricDefinitionCollection, nil } // GetMetricValues will return the metric values based on the resource and metric details diff --git a/x-pack/metricbeat/module/azure/service_interface.go b/x-pack/metricbeat/module/azure/service_interface.go index 46461f2fd927..cb524c7f6ea5 100644 --- a/x-pack/metricbeat/module/azure/service_interface.go +++ b/x-pack/metricbeat/module/azure/service_interface.go @@ -13,8 +13,7 @@ import ( type Service interface { GetResourceDefinitionById(id string) (armresources.GenericResource, error) GetResourceDefinitions(id []string, group []string, rType string, query string) ([]*armresources.GenericResourceExpanded, error) - GetMetricDefinitions(resourceId string, namespace string) (armmonitor.MetricDefinitionCollection, error) - GetMetricDefinitionsWithRetry(resource *armresources.GenericResourceExpanded, namespace string) (armmonitor.MetricDefinitionCollection, error) + GetMetricDefinitionsWithRetry(resourceId string, namespace string) (armmonitor.MetricDefinitionCollection, error) GetMetricNamespaces(resourceId string) (armmonitor.MetricNamespaceCollection, error) GetMetricValues(resourceId string, namespace string, timegrain string, timespan string, metricNames []string, aggregations string, filter string) ([]armmonitor.Metric, string, error) } diff --git a/x-pack/metricbeat/module/azure/storage/client_helper.go b/x-pack/metricbeat/module/azure/storage/client_helper.go index 393607be7ae9..3d58c0ae65a1 100644 --- a/x-pack/metricbeat/module/azure/storage/client_helper.go +++ b/x-pack/metricbeat/module/azure/storage/client_helper.go @@ -41,13 +41,13 @@ func mapMetrics(client *azure.Client, resources []*armresources.GenericResourceE } // get all metric definitions supported by the namespace provided - metricDefinitions, err := client.AzureMonitorService.GetMetricDefinitions(resourceID, namespace) + metricDefinitions, err := client.AzureMonitorService.GetMetricDefinitionsWithRetry(resourceID, namespace) if err != nil { - return nil, fmt.Errorf("no metric definitions were found for resource %s and namespace %s %w", resourceID, namespace, err) + return nil, err } if len(metricDefinitions.Value) == 0 { - return nil, fmt.Errorf("no metric definitions were found for resource %s and namespace %s %w", resourceID, namespace, err) + return nil, err } var filteredMetricDefinitions []armmonitor.MetricDefinition