Skip to content

Commit

Permalink
Ignore prometheus metrics when their values are NaN or Inf (#12084)
Browse files Browse the repository at this point in the history
* Ignore prometheus metrics when their values are NaN or Inf
* Avoid NaN/Inf in prometheus helper
* Add checks on Gauge, Summary and Counter
* Add NaN/Inf check on histogram values
  • Loading branch information
kaiyan-sheng authored May 24, 2019
1 parent 08ccdd7 commit 9244477
Show file tree
Hide file tree
Showing 7 changed files with 511 additions and 40 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Fixed RabbitMQ `queue` metricset gathering when `consumer_utilisation` is set empty at the metrics source {pull}12089[12089]
- Fix direction of incoming IPv6 sockets. {pull}12248[12248]
- Validate that kibana/status metricset cannot be used when xpack is enabled. {pull}12264[12264]
- Ignore prometheus metrics when their values are NaN or Inf. {pull}12084[12084] {issue}10849[10849]

*Packetbeat*

Expand Down
31 changes: 20 additions & 11 deletions metricbeat/helper/prometheus/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,28 +135,33 @@ func (m *commonMetric) GetField() string {
func (m *commonMetric) GetValue(metric *dto.Metric) interface{} {
counter := metric.GetCounter()
if counter != nil {
return int64(counter.GetValue())
if !math.IsNaN(counter.GetValue()) && !math.IsInf(counter.GetValue(), 0) {
return int64(counter.GetValue())
}
}

gauge := metric.GetGauge()
if gauge != nil {
return gauge.GetValue()
if !math.IsNaN(gauge.GetValue()) && !math.IsInf(gauge.GetValue(), 0) {
return gauge.GetValue()
}
}

summary := metric.GetSummary()
if summary != nil {
value := common.MapStr{}
value["sum"] = summary.GetSampleSum()
value["count"] = summary.GetSampleCount()
if !math.IsNaN(summary.GetSampleSum()) && !math.IsInf(summary.GetSampleSum(), 0) {
value["sum"] = summary.GetSampleSum()
value["count"] = summary.GetSampleCount()
}

quantiles := summary.GetQuantile()
percentileMap := common.MapStr{}
for _, quantile := range quantiles {
if !math.IsNaN(quantile.GetValue()) {
key := strconv.FormatFloat((100 * quantile.GetQuantile()), 'f', -1, 64)
if !math.IsNaN(quantile.GetValue()) && !math.IsInf(quantile.GetValue(), 0) {
key := strconv.FormatFloat(100*quantile.GetQuantile(), 'f', -1, 64)
percentileMap[key] = quantile.GetValue()
}

}

if len(percentileMap) != 0 {
Expand All @@ -169,14 +174,18 @@ func (m *commonMetric) GetValue(metric *dto.Metric) interface{} {
histogram := metric.GetHistogram()
if histogram != nil {
value := common.MapStr{}
value["sum"] = histogram.GetSampleSum()
value["count"] = histogram.GetSampleCount()
if !math.IsNaN(histogram.GetSampleSum()) && !math.IsInf(histogram.GetSampleSum(), 0) {
value["sum"] = histogram.GetSampleSum()
value["count"] = histogram.GetSampleCount()
}

buckets := histogram.GetBucket()
bucketMap := common.MapStr{}
for _, bucket := range buckets {
key := strconv.FormatFloat(bucket.GetUpperBound(), 'f', -1, 64)
bucketMap[key] = bucket.GetCumulativeCount()
if bucket.GetCumulativeCount() != uint64(math.NaN()) && bucket.GetCumulativeCount() != uint64(math.Inf(0)) {
key := strconv.FormatFloat(bucket.GetUpperBound(), 'f', -1, 64)
bucketMap[key] = bucket.GetCumulativeCount()
}
}

if len(bucketMap) != 0 {
Expand Down
3 changes: 1 addition & 2 deletions metricbeat/helper/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,9 @@ func (p *prometheus) GetProcessedMetrics(mapping *MetricsMapping) ([]common.MapS

if field != "" {
event := getEvent(eventsMap, keyLabels)

// value may be a mapstr (for histograms and summaries), do a deep update to avoid smashing existing fields
update := common.MapStr{}
update.Put(field, value)
// value may be a mapstr (for histograms and summaries), do a deep update to avoid smashing existing fields
event.DeepUpdate(update)

event.DeepUpdate(labels)
Expand Down
196 changes: 196 additions & 0 deletions metricbeat/helper/prometheus/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,17 @@ metrics_one_count_total{name="jane",surname="foster"} 1
metrics_one_count_total{name="john",surname="williams"} 2
metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3
`

promGaugeKeyLabelWithNaNInf = `
# TYPE metrics_one_count_errors gauge
metrics_one_count_errors{name="jane",surname="foster"} 0
# TYPE metrics_one_count_total gauge
metrics_one_count_total{name="jane",surname="foster"} NaN
metrics_one_count_total{name="foo",surname="bar"} +Inf
metrics_one_count_total{name="john",surname="williams"} -Inf
metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3
`

promCounterKeyLabel = `
Expand All @@ -77,6 +88,16 @@ metrics_one_count_total{name="jane",surname="foster"} 1
metrics_one_count_total{name="john",surname="williams"} 2
metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3
`

promCounterKeyLabelWithNaNInf = `
# TYPE metrics_one_count_errors counter
metrics_one_count_errors{name="jane",surname="foster"} 1
# TYPE metrics_one_count_total counter
metrics_one_count_total{name="jane",surname="foster"} NaN
metrics_one_count_total{name="john",surname="williams"} +Inf
metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3
`

promHistogramKeyLabel = `
Expand All @@ -98,6 +119,19 @@ metrics_one_midichlorians_bucket{rank="padawan",alive="yes",le="+Inf"} 28
metrics_one_midichlorians_sum{rank="padawan",alive="yes"} 800001
metrics_one_midichlorians_count{rank="padawan",alive="yes"} 28
`

promHistogramKeyLabelWithNaNInf = `
# TYPE metrics_one_midichlorians histogram
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="2000"} NaN
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="4000"} +Inf
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="8000"} -Inf
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="16000"} 84
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="32000"} 86
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="+Inf"} 86
metrics_one_midichlorians_sum{rank="youngling",alive="yes"} 1000001
metrics_one_midichlorians_count{rank="youngling",alive="yes"} 86
`

promSummaryKeyLabel = `
Expand All @@ -117,6 +151,18 @@ metrics_force_propagation_ms{kind="sith",quantile="1"} 29
metrics_force_propagation_ms_sum{kind="sith"} 112
metrics_force_propagation_ms_count{kind="sith"} 711
`

promSummaryKeyLabelWithNaNInf = `
# TYPE metrics_force_propagation_ms summary
metrics_force_propagation_ms{kind="jedi",quantile="0"} NaN
metrics_force_propagation_ms{kind="jedi",quantile="0.25"} +Inf
metrics_force_propagation_ms{kind="jedi",quantile="0.5"} -Inf
metrics_force_propagation_ms{kind="jedi",quantile="0.75"} 20
metrics_force_propagation_ms{kind="jedi",quantile="1"} 30
metrics_force_propagation_ms_sum{kind="jedi"} 50
metrics_force_propagation_ms_count{kind="jedi"} 651
`
)

Expand Down Expand Up @@ -496,6 +542,47 @@ func TestPrometheusKeyLabels(t *testing.T) {
},
},

{
testName: "Test gauge with KeyLabel With NaN Inf",
prometheusResponse: promGaugeKeyLabelWithNaNInf,
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"metrics_one_count_errors": Metric("metrics.one.count"),
"metrics_one_count_total": Metric("metrics.one.count"),
},
Labels: map[string]LabelMap{
"name": KeyLabel("metrics.one.labels.name"),
"surname": KeyLabel("metrics.one.labels.surname"),
"age": KeyLabel("metrics.one.labels.age"),
},
},
expectedEvents: []common.MapStr{
common.MapStr{
"metrics": common.MapStr{
"one": common.MapStr{
"count": 0.0,
"labels": common.MapStr{
"name": "jane",
"surname": "foster",
},
},
},
},
common.MapStr{
"metrics": common.MapStr{
"one": common.MapStr{
"count": 3.0,
"labels": common.MapStr{
"name": "jahn",
"surname": "baldwin",
"age": "30",
},
},
},
},
},
},

{
testName: "Test counter with KeyLabel",
prometheusResponse: promCounterKeyLabel,
Expand Down Expand Up @@ -547,6 +634,47 @@ func TestPrometheusKeyLabels(t *testing.T) {
},
},

{
testName: "Test counter with KeyLabel With NaN Inf",
prometheusResponse: promCounterKeyLabelWithNaNInf,
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"metrics_one_count_errors": Metric("metrics.one.count"),
"metrics_one_count_total": Metric("metrics.one.count"),
},
Labels: map[string]LabelMap{
"name": KeyLabel("metrics.one.labels.name"),
"surname": KeyLabel("metrics.one.labels.surname"),
"age": KeyLabel("metrics.one.labels.age"),
},
},
expectedEvents: []common.MapStr{
common.MapStr{
"metrics": common.MapStr{
"one": common.MapStr{
"count": int64(1),
"labels": common.MapStr{
"name": "jane",
"surname": "foster",
},
},
},
},
common.MapStr{
"metrics": common.MapStr{
"one": common.MapStr{
"count": int64(3),
"labels": common.MapStr{
"name": "jahn",
"surname": "baldwin",
"age": "30",
},
},
},
},
},
},

{
testName: "Test histogram with KeyLabel",
prometheusResponse: promHistogramKeyLabel,
Expand Down Expand Up @@ -604,6 +732,40 @@ func TestPrometheusKeyLabels(t *testing.T) {
},
},

{
testName: "Test histogram with KeyLabel With NaN Inf",
prometheusResponse: promHistogramKeyLabelWithNaNInf,
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"metrics_one_midichlorians": Metric("metrics.one.midichlorians"),
},
Labels: map[string]LabelMap{
"rank": KeyLabel("metrics.one.midichlorians.rank"),
"alive": KeyLabel("metrics.one.midichlorians.alive"),
},
},
expectedEvents: []common.MapStr{
common.MapStr{
"metrics": common.MapStr{
"one": common.MapStr{
"midichlorians": common.MapStr{
"count": uint64(86),
"sum": 1000001.0,
"bucket": common.MapStr{
"16000": uint64(84),
"32000": uint64(86),
"+Inf": uint64(86),
},

"rank": "youngling",
"alive": "yes",
},
},
},
},
},
},

{
testName: "Test summary with KeyLabel",
prometheusResponse: promSummaryKeyLabel,
Expand Down Expand Up @@ -662,6 +824,40 @@ func TestPrometheusKeyLabels(t *testing.T) {
},
},
},

{
testName: "Test summary with KeyLabel With NaN Inf",
prometheusResponse: promSummaryKeyLabelWithNaNInf,
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"metrics_force_propagation_ms": Metric("metrics.force.propagation.ms"),
},
Labels: map[string]LabelMap{
"kind": KeyLabel("metrics.force.propagation.ms.labels.kind"),
},
},
expectedEvents: []common.MapStr{
common.MapStr{
"metrics": common.MapStr{
"force": common.MapStr{
"propagation": common.MapStr{
"ms": common.MapStr{
"count": uint64(651),
"sum": 50.0,
"percentile": common.MapStr{
"75": 20.0,
"100": 30.0,
},
"labels": common.MapStr{
"kind": "jedi",
},
},
},
},
},
},
},
},
}

for _, tc := range testCases {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# HELP kafka_consumer_records_lag_records The latest lag of the partition
# TYPE kafka_consumer_records_lag_records gauge
kafka_consumer_records_lag_records{client_id="consumer1",} NaN
kafka_consumer_records_lag_records{client_id="consumer2",} +Inf
kafka_consumer_records_lag_records{client_id="consumer3",} -Inf
kafka_consumer_records_lag_records{client_id="consumer4",} 5
# HELP http_failures Total number of http request failures
# TYPE http_failures counter
http_failures{method="GET"} 2
http_failures{method="POST"} NaN
http_failures{method="DELETE"} +Inf
# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0",} NaN
go_gc_duration_seconds{quantile="0.25",} +Inf
go_gc_duration_seconds{quantile="0.5",} -Inf
go_gc_duration_seconds{quantile="0.75"} 9.8154e-05
go_gc_duration_seconds{quantile="1",} 0.011689149
go_gc_duration_seconds_sum 3.451780079
go_gc_duration_seconds_count 13118
# HELP http_request_duration_seconds request duration histogram
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{le="0.1"} +Inf
http_request_duration_seconds_bucket{le="0.2"} -Inf
http_request_duration_seconds_bucket{le="0.5"} NaN
http_request_duration_seconds_bucket{le="1"} 1
http_request_duration_seconds_bucket{le="2"} 2
http_request_duration_seconds_bucket{le="3"} 3
http_request_duration_seconds_bucket{le="5"} 3
http_request_duration_seconds_bucket{le="+Inf"} 3
http_request_duration_seconds_sum 6
http_request_duration_seconds_count 3
Loading

0 comments on commit 9244477

Please sign in to comment.