Skip to content

Commit

Permalink
Fix prometheus metric name and unit conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
aabmass committed May 22, 2024
1 parent a156bf1 commit 2912c64
Show file tree
Hide file tree
Showing 6 changed files with 505 additions and 61 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#3823] (https://github.com/open-telemetry/opentelemetry-python/pull/3823))
- Add span flags to OTLP spans and links
([#3881](https://github.com/open-telemetry/opentelemetry-python/pull/3881))
- Fix prometheus metric name and unit conversion
([#3924](https://github.com/open-telemetry/opentelemetry-python/pull/3924))
- this is a breaking change to prometheus metric names so they comply with the
[specification](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus).
- common unit abbreviations are converted to Prometheus conventions (`s` -> `seconds`),
following the [collector's implementation](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/c0b51136575aa7ba89326d18edb4549e7e1bbdb9/pkg/translator/prometheus/normalize_name.go#L108)
- repeated `_` are replaced with a single `_`
- UCUM annotations (enclosed in curly braces like `{requests}`) are stripped away
- units with slash are converted e.g. `m/s` -> `meters_per_second`.
- The exporter's API is not changed

## Version 1.24.0/0.45b0 (2024-03-28)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@
from json import dumps
from logging import getLogger
from os import environ
from re import IGNORECASE, UNICODE, compile
from typing import Dict, Sequence, Tuple, Union
from typing import Deque, Dict, Iterable, Sequence, Tuple, Union

from prometheus_client import start_http_server
from prometheus_client.core import (
Expand All @@ -80,6 +79,11 @@
)
from prometheus_client.core import Metric as PrometheusMetric

from opentelemetry.exporter.prometheus._mapping import (
map_unit,
sanitize_attribute,
sanitize_full_name,
)
from opentelemetry.sdk.environment_variables import (
OTEL_EXPORTER_PROMETHEUS_HOST,
OTEL_EXPORTER_PROMETHEUS_PORT,
Expand All @@ -101,6 +105,7 @@
MetricsData,
Sum,
)
from opentelemetry.util.types import Attributes

_logger = getLogger(__name__)

Expand Down Expand Up @@ -164,18 +169,15 @@ class _CustomCollector:

def __init__(self, disable_target_info: bool = False):
self._callback = None
self._metrics_datas = deque()
self._non_letters_digits_underscore_re = compile(
r"[^\w]", UNICODE | IGNORECASE
)
self._metrics_datas: Deque[MetricsData] = deque()
self._disable_target_info = disable_target_info
self._target_info = None

def add_metrics_data(self, metrics_data: MetricsData) -> None:
"""Add metrics to Prometheus data"""
self._metrics_datas.append(metrics_data)

def collect(self) -> None:
def collect(self) -> Iterable[PrometheusMetric]:
"""Collect fetches the metrics from OpenTelemetry
and delivers them as Prometheus Metrics.
Collect is invoked every time a ``prometheus.Gatherer`` is run
Expand All @@ -189,7 +191,7 @@ def collect(self) -> None:
if len(self._metrics_datas):
if not self._disable_target_info:
if self._target_info is None:
attributes = {}
attributes: Attributes = {}
for res in self._metrics_datas[0].resource_metrics:
attributes = {**attributes, **res.resource.attributes}

Expand Down Expand Up @@ -228,17 +230,17 @@ def _translate_to_prometheus(

pre_metric_family_ids = []

metric_name = ""
metric_name += self._sanitize(metric.name)
metric_name = sanitize_full_name(metric.name)

metric_description = metric.description or ""
metric_unit = map_unit(metric.unit)

for number_data_point in metric.data.data_points:
label_keys = []
label_values = []

for key, value in sorted(number_data_point.attributes.items()):
label_keys.append(self._sanitize(key))
label_keys.append(sanitize_attribute(key))
label_values.append(self._check_value(value))

pre_metric_family_ids.append(
Expand All @@ -247,7 +249,7 @@ def _translate_to_prometheus(
metric_name,
metric_description,
"%".join(label_keys),
metric.unit,
metric_unit,
]
)
)
Expand Down Expand Up @@ -299,7 +301,7 @@ def _translate_to_prometheus(
name=metric_name,
documentation=metric_description,
labels=label_keys,
unit=metric.unit,
unit=metric_unit,
)
)
metric_family_id_metric_family[
Expand All @@ -323,7 +325,7 @@ def _translate_to_prometheus(
name=metric_name,
documentation=metric_description,
labels=label_keys,
unit=metric.unit,
unit=metric_unit,
)
)
metric_family_id_metric_family[
Expand All @@ -344,7 +346,7 @@ def _translate_to_prometheus(
name=metric_name,
documentation=metric_description,
labels=label_keys,
unit=metric.unit,
unit=metric_unit,
)
)
metric_family_id_metric_family[
Expand All @@ -361,12 +363,6 @@ def _translate_to_prometheus(
"Unsupported metric data. %s", type(metric.data)
)

def _sanitize(self, key: str) -> str:
"""sanitize the given metric name or label according to Prometheus rule.
Replace all characters other than [A-Za-z0-9_] with '_'.
"""
return self._non_letters_digits_underscore_re.sub("_", key)

# pylint: disable=no-self-use
def _check_value(self, value: Union[int, float, str, Sequence]) -> str:
"""Check the label value and return is appropriate representation"""
Expand All @@ -380,7 +376,7 @@ def _create_info_metric(
"""Create an Info Metric Family with list of attributes"""
# sanitize the attribute names according to Prometheus rule
attributes = {
self._sanitize(key): self._check_value(value)
sanitize_attribute(key): self._check_value(value)
for key, value in attributes.items()
}
info = InfoMetricFamily(name, description, labels=attributes)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from re import UNICODE, compile

_SANITIZE_NAME_RE = compile(r"([^a-zA-Z0-9:]+)|_{2,}", UNICODE)
# Same as name, but doesn't allow ":"
_SANITIZE_ATTRIBUTE_KEY_RE = compile(r"([^a-zA-Z0-9]+)|_{2,}", UNICODE)

# UCUM annotations are ASCII chars 33-126 enclosed in curly braces
# https://ucum.org/ucum#para-6
_UCUM_ANNOTATION_CURLY = compile(r"{[!-~]*}")

# Remaps common UCUM and SI units to prometheus conventions. Copied from
# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.101.0/pkg/translator/prometheus/normalize_name.go#L19
# See specification:
# https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1
_UNIT_MAPPINGS = {
# Time
"d": "days",
"h": "hours",
"min": "minutes",
"s": "seconds",
"ms": "milliseconds",
"us": "microseconds",
"ns": "nanoseconds",
# Bytes
"By": "bytes",
"KiBy": "kibibytes",
"MiBy": "mebibytes",
"GiBy": "gibibytes",
"TiBy": "tibibytes",
"KBy": "kilobytes",
"MBy": "megabytes",
"GBy": "gigabytes",
"TBy": "terabytes",
# SI
"m": "meters",
"V": "volts",
"A": "amperes",
"J": "joules",
"W": "watts",
"g": "grams",
# Misc
"Cel": "celsius",
"Hz": "hertz",
# TODO: this conflicts with the spec but I think it is correct. Need to open a spec issue
"1": "",
"%": "percent",
}
# Similar to _UNIT_MAPPINGS, but for "per" unit denominator.
# Example: s => per second (singular)
# Copied from https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/80317ce83ed87a2dff0c316bb939afbfaa823d5e/pkg/translator/prometheus/normalize_name.go#L58
_PER_UNIT_MAPPINGS = {
"s": "second",
"m": "minute",
"h": "hour",
"d": "day",
"w": "week",
"mo": "month",
"y": "year",
}


def sanitize_full_name(name: str) -> str:
"""sanitize the given metric name according to Prometheus rule, including sanitizing
leading digits
https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1
"""
# Leading number special case
if name and name[0].isdigit():
name = "_" + name[1:]
return _sanitize_name(name)


def _sanitize_name(name: str) -> str:
"""sanitize the given metric name according to Prometheus rule, but does not handle
sanitizing a leading digit."""
return _SANITIZE_NAME_RE.sub("_", name)


def sanitize_attribute(key: str) -> str:
"""sanitize the given metric attribute key according to Prometheus rule.
https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-attributes
"""
# Leading number special case
if key and key[0].isdigit():
key = "_" + key[1:]
return _SANITIZE_ATTRIBUTE_KEY_RE.sub("_", key)


def map_unit(unit: str) -> str:
"""Maps unit to common prometheus metric names if available and sanitizes any invalid
characters
See:
- https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1
- https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.101.0/pkg/translator/prometheus/normalize_name.go#L108
"""
# remove curly brace UCUM annotations
unit = _UCUM_ANNOTATION_CURLY.sub("", unit)

if unit in _UNIT_MAPPINGS:
return _UNIT_MAPPINGS[unit]

# replace "/" with "per" units like m/s -> meters_per_second
ratio_unit_subparts = unit.split("/", maxsplit=1)
if len(ratio_unit_subparts) == 2:
bottom = _sanitize_name(ratio_unit_subparts[1])
if bottom:
top = _sanitize_name(ratio_unit_subparts[0])
top = _UNIT_MAPPINGS.get(top, top)
bottom = _PER_UNIT_MAPPINGS.get(bottom, bottom)
return f"{top}_per_{bottom}" if top else f"per_{bottom}"

return (
# since units end up as a metric name suffix, they must be sanitized
_sanitize_name(unit)
# strip surrounding "_" chars since it will lead to consecutive underscores in the
# metric name
.strip("_")
)
Loading

0 comments on commit 2912c64

Please sign in to comment.