diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 1121d795fe3..ace63d069bf 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -127,6 +127,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fixed RabbitMQ `queue` metricset gathering when `consumer_utilisation` is set empty at the metrics source {pull}12089[12089] - Fix direction of incoming IPv6 sockets. {pull}12248[12248] - Validate that kibana/status metricset cannot be used when xpack is enabled. {pull}12264[12264] +- Ignore prometheus metrics when their values are NaN or Inf. {pull}12084[12084] {issue}10849[10849] *Packetbeat* diff --git a/metricbeat/helper/prometheus/metric.go b/metricbeat/helper/prometheus/metric.go index c726e4ab99b..0fb1df2ba99 100644 --- a/metricbeat/helper/prometheus/metric.go +++ b/metricbeat/helper/prometheus/metric.go @@ -135,28 +135,33 @@ func (m *commonMetric) GetField() string { func (m *commonMetric) GetValue(metric *dto.Metric) interface{} { counter := metric.GetCounter() if counter != nil { - return int64(counter.GetValue()) + if !math.IsNaN(counter.GetValue()) && !math.IsInf(counter.GetValue(), 0) { + return int64(counter.GetValue()) + } } gauge := metric.GetGauge() if gauge != nil { - return gauge.GetValue() + if !math.IsNaN(gauge.GetValue()) && !math.IsInf(gauge.GetValue(), 0) { + return gauge.GetValue() + } } summary := metric.GetSummary() if summary != nil { value := common.MapStr{} - value["sum"] = summary.GetSampleSum() - value["count"] = summary.GetSampleCount() + if !math.IsNaN(summary.GetSampleSum()) && !math.IsInf(summary.GetSampleSum(), 0) { + value["sum"] = summary.GetSampleSum() + value["count"] = summary.GetSampleCount() + } quantiles := summary.GetQuantile() percentileMap := common.MapStr{} for _, quantile := range quantiles { - if !math.IsNaN(quantile.GetValue()) { - key := strconv.FormatFloat((100 * quantile.GetQuantile()), 'f', -1, 64) + if !math.IsNaN(quantile.GetValue()) && !math.IsInf(quantile.GetValue(), 0) { + key := strconv.FormatFloat(100*quantile.GetQuantile(), 'f', -1, 64) percentileMap[key] = quantile.GetValue() } - } if len(percentileMap) != 0 { @@ -169,14 +174,18 @@ func (m *commonMetric) GetValue(metric *dto.Metric) interface{} { histogram := metric.GetHistogram() if histogram != nil { value := common.MapStr{} - value["sum"] = histogram.GetSampleSum() - value["count"] = histogram.GetSampleCount() + if !math.IsNaN(histogram.GetSampleSum()) && !math.IsInf(histogram.GetSampleSum(), 0) { + value["sum"] = histogram.GetSampleSum() + value["count"] = histogram.GetSampleCount() + } buckets := histogram.GetBucket() bucketMap := common.MapStr{} for _, bucket := range buckets { - key := strconv.FormatFloat(bucket.GetUpperBound(), 'f', -1, 64) - bucketMap[key] = bucket.GetCumulativeCount() + if bucket.GetCumulativeCount() != uint64(math.NaN()) && bucket.GetCumulativeCount() != uint64(math.Inf(0)) { + key := strconv.FormatFloat(bucket.GetUpperBound(), 'f', -1, 64) + bucketMap[key] = bucket.GetCumulativeCount() + } } if len(bucketMap) != 0 { diff --git a/metricbeat/helper/prometheus/prometheus.go b/metricbeat/helper/prometheus/prometheus.go index 461eb159587..7638f58c365 100644 --- a/metricbeat/helper/prometheus/prometheus.go +++ b/metricbeat/helper/prometheus/prometheus.go @@ -162,10 +162,9 @@ func (p *prometheus) GetProcessedMetrics(mapping *MetricsMapping) ([]common.MapS if field != "" { event := getEvent(eventsMap, keyLabels) - - // value may be a mapstr (for histograms and summaries), do a deep update to avoid smashing existing fields update := common.MapStr{} update.Put(field, value) + // value may be a mapstr (for histograms and summaries), do a deep update to avoid smashing existing fields event.DeepUpdate(update) event.DeepUpdate(labels) diff --git a/metricbeat/helper/prometheus/prometheus_test.go b/metricbeat/helper/prometheus/prometheus_test.go index a782665d320..dbe1ca92260 100644 --- a/metricbeat/helper/prometheus/prometheus_test.go +++ b/metricbeat/helper/prometheus/prometheus_test.go @@ -69,6 +69,17 @@ metrics_one_count_total{name="jane",surname="foster"} 1 metrics_one_count_total{name="john",surname="williams"} 2 metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3 +` + + promGaugeKeyLabelWithNaNInf = ` +# TYPE metrics_one_count_errors gauge +metrics_one_count_errors{name="jane",surname="foster"} 0 +# TYPE metrics_one_count_total gauge +metrics_one_count_total{name="jane",surname="foster"} NaN +metrics_one_count_total{name="foo",surname="bar"} +Inf +metrics_one_count_total{name="john",surname="williams"} -Inf +metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3 + ` promCounterKeyLabel = ` @@ -77,6 +88,16 @@ metrics_one_count_total{name="jane",surname="foster"} 1 metrics_one_count_total{name="john",surname="williams"} 2 metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3 +` + + promCounterKeyLabelWithNaNInf = ` +# TYPE metrics_one_count_errors counter +metrics_one_count_errors{name="jane",surname="foster"} 1 +# TYPE metrics_one_count_total counter +metrics_one_count_total{name="jane",surname="foster"} NaN +metrics_one_count_total{name="john",surname="williams"} +Inf +metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3 + ` promHistogramKeyLabel = ` @@ -98,6 +119,19 @@ metrics_one_midichlorians_bucket{rank="padawan",alive="yes",le="+Inf"} 28 metrics_one_midichlorians_sum{rank="padawan",alive="yes"} 800001 metrics_one_midichlorians_count{rank="padawan",alive="yes"} 28 +` + + promHistogramKeyLabelWithNaNInf = ` +# TYPE metrics_one_midichlorians histogram +metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="2000"} NaN +metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="4000"} +Inf +metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="8000"} -Inf +metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="16000"} 84 +metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="32000"} 86 +metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="+Inf"} 86 +metrics_one_midichlorians_sum{rank="youngling",alive="yes"} 1000001 +metrics_one_midichlorians_count{rank="youngling",alive="yes"} 86 + ` promSummaryKeyLabel = ` @@ -117,6 +151,18 @@ metrics_force_propagation_ms{kind="sith",quantile="1"} 29 metrics_force_propagation_ms_sum{kind="sith"} 112 metrics_force_propagation_ms_count{kind="sith"} 711 +` + + promSummaryKeyLabelWithNaNInf = ` +# TYPE metrics_force_propagation_ms summary +metrics_force_propagation_ms{kind="jedi",quantile="0"} NaN +metrics_force_propagation_ms{kind="jedi",quantile="0.25"} +Inf +metrics_force_propagation_ms{kind="jedi",quantile="0.5"} -Inf +metrics_force_propagation_ms{kind="jedi",quantile="0.75"} 20 +metrics_force_propagation_ms{kind="jedi",quantile="1"} 30 +metrics_force_propagation_ms_sum{kind="jedi"} 50 +metrics_force_propagation_ms_count{kind="jedi"} 651 + ` ) @@ -496,6 +542,47 @@ func TestPrometheusKeyLabels(t *testing.T) { }, }, + { + testName: "Test gauge with KeyLabel With NaN Inf", + prometheusResponse: promGaugeKeyLabelWithNaNInf, + mapping: &MetricsMapping{ + Metrics: map[string]MetricMap{ + "metrics_one_count_errors": Metric("metrics.one.count"), + "metrics_one_count_total": Metric("metrics.one.count"), + }, + Labels: map[string]LabelMap{ + "name": KeyLabel("metrics.one.labels.name"), + "surname": KeyLabel("metrics.one.labels.surname"), + "age": KeyLabel("metrics.one.labels.age"), + }, + }, + expectedEvents: []common.MapStr{ + common.MapStr{ + "metrics": common.MapStr{ + "one": common.MapStr{ + "count": 0.0, + "labels": common.MapStr{ + "name": "jane", + "surname": "foster", + }, + }, + }, + }, + common.MapStr{ + "metrics": common.MapStr{ + "one": common.MapStr{ + "count": 3.0, + "labels": common.MapStr{ + "name": "jahn", + "surname": "baldwin", + "age": "30", + }, + }, + }, + }, + }, + }, + { testName: "Test counter with KeyLabel", prometheusResponse: promCounterKeyLabel, @@ -547,6 +634,47 @@ func TestPrometheusKeyLabels(t *testing.T) { }, }, + { + testName: "Test counter with KeyLabel With NaN Inf", + prometheusResponse: promCounterKeyLabelWithNaNInf, + mapping: &MetricsMapping{ + Metrics: map[string]MetricMap{ + "metrics_one_count_errors": Metric("metrics.one.count"), + "metrics_one_count_total": Metric("metrics.one.count"), + }, + Labels: map[string]LabelMap{ + "name": KeyLabel("metrics.one.labels.name"), + "surname": KeyLabel("metrics.one.labels.surname"), + "age": KeyLabel("metrics.one.labels.age"), + }, + }, + expectedEvents: []common.MapStr{ + common.MapStr{ + "metrics": common.MapStr{ + "one": common.MapStr{ + "count": int64(1), + "labels": common.MapStr{ + "name": "jane", + "surname": "foster", + }, + }, + }, + }, + common.MapStr{ + "metrics": common.MapStr{ + "one": common.MapStr{ + "count": int64(3), + "labels": common.MapStr{ + "name": "jahn", + "surname": "baldwin", + "age": "30", + }, + }, + }, + }, + }, + }, + { testName: "Test histogram with KeyLabel", prometheusResponse: promHistogramKeyLabel, @@ -604,6 +732,40 @@ func TestPrometheusKeyLabels(t *testing.T) { }, }, + { + testName: "Test histogram with KeyLabel With NaN Inf", + prometheusResponse: promHistogramKeyLabelWithNaNInf, + mapping: &MetricsMapping{ + Metrics: map[string]MetricMap{ + "metrics_one_midichlorians": Metric("metrics.one.midichlorians"), + }, + Labels: map[string]LabelMap{ + "rank": KeyLabel("metrics.one.midichlorians.rank"), + "alive": KeyLabel("metrics.one.midichlorians.alive"), + }, + }, + expectedEvents: []common.MapStr{ + common.MapStr{ + "metrics": common.MapStr{ + "one": common.MapStr{ + "midichlorians": common.MapStr{ + "count": uint64(86), + "sum": 1000001.0, + "bucket": common.MapStr{ + "16000": uint64(84), + "32000": uint64(86), + "+Inf": uint64(86), + }, + + "rank": "youngling", + "alive": "yes", + }, + }, + }, + }, + }, + }, + { testName: "Test summary with KeyLabel", prometheusResponse: promSummaryKeyLabel, @@ -662,6 +824,40 @@ func TestPrometheusKeyLabels(t *testing.T) { }, }, }, + + { + testName: "Test summary with KeyLabel With NaN Inf", + prometheusResponse: promSummaryKeyLabelWithNaNInf, + mapping: &MetricsMapping{ + Metrics: map[string]MetricMap{ + "metrics_force_propagation_ms": Metric("metrics.force.propagation.ms"), + }, + Labels: map[string]LabelMap{ + "kind": KeyLabel("metrics.force.propagation.ms.labels.kind"), + }, + }, + expectedEvents: []common.MapStr{ + common.MapStr{ + "metrics": common.MapStr{ + "force": common.MapStr{ + "propagation": common.MapStr{ + "ms": common.MapStr{ + "count": uint64(651), + "sum": 50.0, + "percentile": common.MapStr{ + "75": 20.0, + "100": 30.0, + }, + "labels": common.MapStr{ + "kind": "jedi", + }, + }, + }, + }, + }, + }, + }, + }, } for _, tc := range testCases { diff --git a/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain b/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain new file mode 100644 index 00000000000..1101ede5d96 --- /dev/null +++ b/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain @@ -0,0 +1,32 @@ +# HELP kafka_consumer_records_lag_records The latest lag of the partition +# TYPE kafka_consumer_records_lag_records gauge +kafka_consumer_records_lag_records{client_id="consumer1",} NaN +kafka_consumer_records_lag_records{client_id="consumer2",} +Inf +kafka_consumer_records_lag_records{client_id="consumer3",} -Inf +kafka_consumer_records_lag_records{client_id="consumer4",} 5 +# HELP http_failures Total number of http request failures +# TYPE http_failures counter +http_failures{method="GET"} 2 +http_failures{method="POST"} NaN +http_failures{method="DELETE"} +Inf +# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0",} NaN +go_gc_duration_seconds{quantile="0.25",} +Inf +go_gc_duration_seconds{quantile="0.5",} -Inf +go_gc_duration_seconds{quantile="0.75"} 9.8154e-05 +go_gc_duration_seconds{quantile="1",} 0.011689149 +go_gc_duration_seconds_sum 3.451780079 +go_gc_duration_seconds_count 13118 +# HELP http_request_duration_seconds request duration histogram +# TYPE http_request_duration_seconds histogram +http_request_duration_seconds_bucket{le="0.1"} +Inf +http_request_duration_seconds_bucket{le="0.2"} -Inf +http_request_duration_seconds_bucket{le="0.5"} NaN +http_request_duration_seconds_bucket{le="1"} 1 +http_request_duration_seconds_bucket{le="2"} 2 +http_request_duration_seconds_bucket{le="3"} 3 +http_request_duration_seconds_bucket{le="5"} 3 +http_request_duration_seconds_bucket{le="+Inf"} 3 +http_request_duration_seconds_sum 6 +http_request_duration_seconds_count 3 diff --git a/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain-expected.json b/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain-expected.json new file mode 100644 index 00000000000..60efae224fe --- /dev/null +++ b/metricbeat/module/prometheus/collector/_meta/testdata/metrics-with-naninf.plain-expected.json @@ -0,0 +1,222 @@ +[ + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metricset": { + "name": "collector" + }, + "prometheus": { + "labels": { + "client_id": "consumer4" + }, + "metrics": { + "kafka_consumer_records_lag_records": 5 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metricset": { + "name": "collector" + }, + "prometheus": { + "labels": { + "method": "GET" + }, + "metrics": { + "http_failures": 2 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metricset": { + "name": "collector" + }, + "prometheus": { + "labels": { + "le": "5" + }, + "metrics": { + "http_request_duration_seconds_bucket": 3 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metricset": { + "name": "collector" + }, + "prometheus": { + "labels": { + "le": "+Inf" + }, + "metrics": { + "http_request_duration_seconds_bucket": 3 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metricset": { + "name": "collector" + }, + "prometheus": { + "labels": { + "le": "3" + }, + "metrics": { + "http_request_duration_seconds_bucket": 3 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metricset": { + "name": "collector" + }, + "prometheus": { + "metrics": { + "go_gc_duration_seconds_count": 13118, + "go_gc_duration_seconds_sum": 3.451780079, + "http_request_duration_seconds_count": 3, + "http_request_duration_seconds_sum": 6 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metricset": { + "name": "collector" + }, + "prometheus": { + "labels": { + "le": "2" + }, + "metrics": { + "http_request_duration_seconds_bucket": 2 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metricset": { + "name": "collector" + }, + "prometheus": { + "labels": { + "le": "1" + }, + "metrics": { + "http_request_duration_seconds_bucket": 1 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metricset": { + "name": "collector" + }, + "prometheus": { + "labels": { + "quantile": "1" + }, + "metrics": { + "go_gc_duration_seconds": 0.011689149 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + }, + { + "event": { + "dataset": "prometheus.collector", + "duration": 115000, + "module": "prometheus" + }, + "metricset": { + "name": "collector" + }, + "prometheus": { + "labels": { + "quantile": "0.75" + }, + "metrics": { + "go_gc_duration_seconds": 0.000098154 + } + }, + "service": { + "address": "127.0.0.1:55555", + "type": "prometheus" + } + } +] \ No newline at end of file diff --git a/metricbeat/module/prometheus/collector/data.go b/metricbeat/module/prometheus/collector/data.go index a2e191d3309..ad4e599ae31 100644 --- a/metricbeat/module/prometheus/collector/data.go +++ b/metricbeat/module/prometheus/collector/data.go @@ -55,36 +55,42 @@ func getPromEventsFromMetricFamily(mf *dto.MetricFamily) []PromEvent { counter := metric.GetCounter() if counter != nil { - events = append(events, PromEvent{ - data: common.MapStr{ - name: counter.GetValue(), - }, - labels: labels, - }) + if !math.IsNaN(counter.GetValue()) && !math.IsInf(counter.GetValue(), 0) { + events = append(events, PromEvent{ + data: common.MapStr{ + name: counter.GetValue(), + }, + labels: labels, + }) + } } gauge := metric.GetGauge() if gauge != nil { - events = append(events, PromEvent{ - data: common.MapStr{ - name: gauge.GetValue(), - }, - labels: labels, - }) + if !math.IsNaN(gauge.GetValue()) && !math.IsInf(gauge.GetValue(), 0) { + events = append(events, PromEvent{ + data: common.MapStr{ + name: gauge.GetValue(), + }, + labels: labels, + }) + } } summary := metric.GetSummary() if summary != nil { - events = append(events, PromEvent{ - data: common.MapStr{ - name + "_sum": summary.GetSampleSum(), - name + "_count": summary.GetSampleCount(), - }, - labels: labels, - }) + if !math.IsNaN(summary.GetSampleSum()) && !math.IsInf(summary.GetSampleSum(), 0) { + events = append(events, PromEvent{ + data: common.MapStr{ + name + "_sum": summary.GetSampleSum(), + name + "_count": summary.GetSampleCount(), + }, + labels: labels, + }) + } for _, quantile := range summary.GetQuantile() { - if math.IsNaN(quantile.GetValue()) { + if math.IsNaN(quantile.GetValue()) || math.IsInf(quantile.GetValue(), 0) { continue } @@ -101,15 +107,21 @@ func getPromEventsFromMetricFamily(mf *dto.MetricFamily) []PromEvent { histogram := metric.GetHistogram() if histogram != nil { - events = append(events, PromEvent{ - data: common.MapStr{ - name + "_sum": histogram.GetSampleSum(), - name + "_count": histogram.GetSampleCount(), - }, - labels: labels, - }) + if !math.IsNaN(histogram.GetSampleSum()) && !math.IsInf(histogram.GetSampleSum(), 0) { + events = append(events, PromEvent{ + data: common.MapStr{ + name + "_sum": histogram.GetSampleSum(), + name + "_count": histogram.GetSampleCount(), + }, + labels: labels, + }) + } for _, bucket := range histogram.GetBucket() { + if bucket.GetCumulativeCount() == uint64(math.NaN()) || bucket.GetCumulativeCount() == uint64(math.Inf(0)) { + continue + } + bucketLabels := labels.Clone() bucketLabels["le"] = strconv.FormatFloat(bucket.GetUpperBound(), 'f', -1, 64)