Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Prometheus metric types parser #39743

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
- Fix behavior of cgroups path discovery when monitoring the host system from within a container {pull}39627[39627]
- Fix issue where beats may report incorrect metrics for its own process when running inside a container {pull}39627[39627]
- Fix for MySQL/Performance - Query failure for MySQL versions below v8.0.1, for performance metric `quantile_95`. {pull}38710[38710]
- Fix Prometheus helper text parser to store each metric family type. {pull}39743[39743]
- Normalize AWS RDS CPU Utilization values before making the metadata API call. {pull}39664[39664]
- Fix query logic for temp and non-temp tablespaces in Oracle module. {issue}38051[38051] {pull}39787[39787]

Expand Down
23 changes: 21 additions & 2 deletions metricbeat/helper/prometheus/textparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,8 @@ func ParseMetricFamilies(b []byte, contentType string, ts time.Time, logger *log
summariesByName = map[string]map[string]*OpenMetric{}
histogramsByName = map[string]map[string]*OpenMetric{}
fam *MetricFamily
mt = textparse.MetricTypeUnknown
// metricTypes stores the metric type for each metric name.
metricTypes = make(map[string]textparse.MetricType)
)
var err error

Expand Down Expand Up @@ -530,7 +531,8 @@ func ParseMetricFamilies(b []byte, contentType string, ts time.Time, logger *log
} else {
fam.Type = t
}
mt = t
// Store the metric type for each base metric name.
metricTypes[s] = t
continue
case textparse.EntryHelp:
buf, t := parser.Help()
Expand Down Expand Up @@ -611,6 +613,23 @@ func ParseMetricFamilies(b []byte, contentType string, ts time.Time, logger *log
lookupMetricName := metricName
var exm *exemplar.Exemplar

mt, ok := metricTypes[metricName]
if !ok {
// Splitting is necessary to find the base metric name type in the metricTypes map.
// This allows us to group related metrics together under the same base metric name.
// For example, the metric family `summary_metric` can have the metrics
// `summary_metric_count` and `summary_metric_sum`, all having the same metric type.
parts := strings.Split(metricName, "_")
baseMetricNamekey := strings.Join(parts[:len(parts)-1], "_")

// If the metric type is not found, default to unknown
if metricTypeFound, ok := metricTypes[baseMetricNamekey]; ok {
mt = metricTypeFound
} else {
mt = textparse.MetricTypeUnknown
}
}

// Suffixes - https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#suffixes
switch mt {
case textparse.MetricTypeCounter:
Expand Down
160 changes: 160 additions & 0 deletions metricbeat/helper/prometheus/textparse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,166 @@ process_cpu 20
require.ElementsMatch(t, expected, result)
}

func TestGroupWithHeaderBlockPrometheus(t *testing.T) {
input := `
# HELP nginx_sts_server_bytes_total The request/response bytes
# TYPE nginx_sts_server_bytes_total counter
# HELP nginx_sts_server_connects_total The connects counter
# TYPE nginx_sts_server_connects_total counter
# HELP nginx_sts_server_session_seconds_total The session duration time
# TYPE nginx_sts_server_session_seconds_total counter
# HELP nginx_sts_server_session_seconds The average of session duration time in seconds
# TYPE nginx_sts_server_session_seconds gauge
# HELP nginx_sts_server_session_duration_seconds The histogram of session duration in seconds
# TYPE nginx_sts_server_session_duration_seconds histogram
nginx_sts_server_bytes_total{listen="TCP:8091:127.0.0.1",port="8091",protocol="TCP",direction="in"} 0
nginx_sts_server_bytes_total{listen="TCP:8091:127.0.0.1",port="8091",protocol="TCP",direction="out"} 0
nginx_sts_server_connects_total{listen="TCP:8091:127.0.0.1",port="8091",protocol="TCP",code="1xx"} 0
nginx_sts_server_connects_total{listen="TCP:8091:127.0.0.1",port="8091",protocol="TCP",code="2xx"} 0
nginx_sts_server_connects_total{listen="TCP:8091:127.0.0.1",port="8091",protocol="TCP",code="3xx"} 0
nginx_sts_server_connects_total{listen="TCP:8091:127.0.0.1",port="8091",protocol="TCP",code="4xx"} 0
nginx_sts_server_connects_total{listen="TCP:8091:127.0.0.1",port="8091",protocol="TCP",code="5xx"} 171
nginx_sts_server_connects_total{listen="TCP:8091:127.0.0.1",port="8091",protocol="TCP",code="total"} 171
nginx_sts_server_session_seconds_total{listen="TCP:8091:127.0.0.1",port="8091",protocol="TCP"} 0.016
nginx_sts_server_session_seconds{listen="TCP:8091:127.0.0.1",port="8091",protocol="TCP"} 0.000
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Suggestion]

2 things:

  • from this the metric name of nginx_sts_server_session_seconds_total would be nginx_sts_server_session_seconds and the metric name of nginx_sts_server_session_seconds would be nginx_sts_server_session. Just checking, is my understand correct and is it the expected behaviour?
  • wouldn't be good to add 2 metrics with the same base name, like in your example summary_metric_count and summary_metric_sum?

Copy link
Contributor Author

@gpop63 gpop63 Jun 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My example was for the metric family below (we have a test for this). The metric family is called summary_metric and it includes various metrics such as summary_metric_sum, summary_metric_count, etc. We only want to store the metric type for the metric family summary_metric since all its metrics share the same type, which is summary.

# TYPE summary_metric summary
summary_metric{quantile="0.5"} 29735
summary_metric{quantile="0.9"} 47103
summary_metric{quantile="0.99"} 50681
summary_metric{noquantile="0.2"} 50681
summary_metric_sum 234892394
summary_metric_count 44000
summary_metric_impossible 123
# EOF

buf, t := parser.Type()
s := string(buf)
fam, ok = metricFamiliesByName[s]
if !ok {
fam = &MetricFamily{Name: &s, Type: t}
metricFamiliesByName[s] = fam
} else {
fam.Type = t
}
// Store the metric type for each base metric name.
metricTypes[s] = t

s represents the metric family name (e.g., summary_metric). Only the metric family name is stored in the map, without its individual metrics.

For the examples you mentioned, nginx_sts_server_session_seconds_total and nginx_sts_server_session_seconds, these metrics are already the metric family names (they don't have any child metrics) and thus would not trigger the !ok condition. The !ok condition was added as an edge case for families that have multiple child metrics with different suffixes.

`

expected := []*MetricFamily{
{
Name: stringp("nginx_sts_server_bytes_total"),
Help: stringp("The request/response bytes"),
Type: "counter",
Metric: []*OpenMetric{
{
Name: stringp("nginx_sts_server_bytes_total"),
Label: []*labels.Label{
{Name: "direction", Value: "in"},
{Name: "listen", Value: "TCP:8091:127.0.0.1"},
{Name: "port", Value: "8091"},
{Name: "protocol", Value: "TCP"},
},
Counter: &Counter{Value: float64p(0)},
},
{
Name: stringp("nginx_sts_server_bytes_total"),
Label: []*labels.Label{
{Name: "direction", Value: "out"},
{Name: "listen", Value: "TCP:8091:127.0.0.1"},
{Name: "port", Value: "8091"},
{Name: "protocol", Value: "TCP"},
},
Counter: &Counter{Value: float64p(0)},
},
},
},
{
Name: stringp("nginx_sts_server_connects_total"),
Help: stringp("The connects counter"),
Type: "counter",
Metric: []*OpenMetric{
{
Name: stringp("nginx_sts_server_connects_total"),
Label: []*labels.Label{
{Name: "code", Value: "1xx"},
{Name: "listen", Value: "TCP:8091:127.0.0.1"},
{Name: "port", Value: "8091"},
{Name: "protocol", Value: "TCP"},
},
Counter: &Counter{Value: float64p(0)},
},
{
Name: stringp("nginx_sts_server_connects_total"),
Label: []*labels.Label{
{Name: "code", Value: "2xx"},
{Name: "listen", Value: "TCP:8091:127.0.0.1"},
{Name: "port", Value: "8091"},
{Name: "protocol", Value: "TCP"},
},
Counter: &Counter{Value: float64p(0)},
},
{
Name: stringp("nginx_sts_server_connects_total"),
Label: []*labels.Label{
{Name: "code", Value: "3xx"},
{Name: "listen", Value: "TCP:8091:127.0.0.1"},
{Name: "port", Value: "8091"},
{Name: "protocol", Value: "TCP"},
},
Counter: &Counter{Value: float64p(0)},
},
{
Name: stringp("nginx_sts_server_connects_total"),
Label: []*labels.Label{
{Name: "code", Value: "4xx"},
{Name: "listen", Value: "TCP:8091:127.0.0.1"},
{Name: "port", Value: "8091"},
{Name: "protocol", Value: "TCP"},
},
Counter: &Counter{Value: float64p(0)},
},
{
Name: stringp("nginx_sts_server_connects_total"),
Label: []*labels.Label{
{Name: "code", Value: "5xx"},
{Name: "listen", Value: "TCP:8091:127.0.0.1"},
{Name: "port", Value: "8091"},
{Name: "protocol", Value: "TCP"},
},
Counter: &Counter{Value: float64p(171)},
},
{
Name: stringp("nginx_sts_server_connects_total"),
Label: []*labels.Label{
{Name: "code", Value: "total"},
{Name: "listen", Value: "TCP:8091:127.0.0.1"},
{Name: "port", Value: "8091"},
{Name: "protocol", Value: "TCP"},
},
Counter: &Counter{Value: float64p(171)},
},
},
},
{
Name: stringp("nginx_sts_server_session_seconds_total"),
Help: stringp("The session duration time"),
Type: "counter",
Metric: []*OpenMetric{
{
Name: stringp("nginx_sts_server_session_seconds_total"),
Label: []*labels.Label{
{Name: "listen", Value: "TCP:8091:127.0.0.1"},
{Name: "port", Value: "8091"},
{Name: "protocol", Value: "TCP"},
},
Counter: &Counter{Value: float64p(0.016)},
},
},
},
{
Name: stringp("nginx_sts_server_session_seconds"),
Help: stringp("The average of session duration time in seconds"),
Type: "gauge",
Metric: []*OpenMetric{
{
Name: stringp("nginx_sts_server_session_seconds"),
Label: []*labels.Label{
{Name: "listen", Value: "TCP:8091:127.0.0.1"},
{Name: "port", Value: "8091"},
{Name: "protocol", Value: "TCP"},
},
Gauge: &Gauge{Value: float64p(0.000)},
},
},
},
}

result, err := ParseMetricFamilies([]byte(input), ContentTypeTextFormat, time.Now(), nil)
if err != nil {
t.Fatalf("ParseMetricFamilies for content type %s returned an error: %v", ContentTypeTextFormat, err)
}
require.ElementsMatch(t, expected, result)
}

func TestGaugeOpenMetrics(t *testing.T) {
input := `
# TYPE first_metric gauge
Expand Down
Loading