From 0335dd8ea6684cfc87a10a6faa1176466d02f976 Mon Sep 17 00:00:00 2001 From: Aaron Abbott Date: Mon, 20 May 2024 21:40:02 +0000 Subject: [PATCH 1/7] Fix prometheus metric name and unit conversion --- CHANGELOG.md | 10 + .../exporter/prometheus/__init__.py | 40 ++- .../exporter/prometheus/_mapping.py | 135 ++++++++++ .../tests/test_mapping.py | 113 +++++++++ .../tests/test_prometheus_exporter.py | 232 +++++++++++++++--- .../src/opentelemetry/test/metrictestutil.py | 36 +++ 6 files changed, 505 insertions(+), 61 deletions(-) create mode 100644 exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py create mode 100644 exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f38f17f133..9978898b382 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#3917](https://github.com/open-telemetry/opentelemetry-python/pull/3917/)) - Add OpenTelemetry trove classifiers to PyPI packages ([#3913] (https://github.com/open-telemetry/opentelemetry-python/pull/3913)) +- Fix prometheus metric name and unit conversion + ([#3924](https://github.com/open-telemetry/opentelemetry-python/pull/3924)) + - this is a breaking change to prometheus metric names so they comply with the + [specification](https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus). + - common unit abbreviations are converted to Prometheus conventions (`s` -> `seconds`), + following the [collector's implementation](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/c0b51136575aa7ba89326d18edb4549e7e1bbdb9/pkg/translator/prometheus/normalize_name.go#L108) + - repeated `_` are replaced with a single `_` + - UCUM annotations (enclosed in curly braces like `{requests}`) are stripped away + - units with slash are converted e.g. `m/s` -> `meters_per_second`. + - The exporter's API is not changed ## Version 1.24.0/0.45b0 (2024-03-28) diff --git a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py index a84e21214d7..af8eedb10f1 100644 --- a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py +++ b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py @@ -67,8 +67,7 @@ from json import dumps from logging import getLogger from os import environ -from re import IGNORECASE, UNICODE, compile -from typing import Dict, Sequence, Tuple, Union +from typing import Deque, Dict, Iterable, Sequence, Tuple, Union from prometheus_client import start_http_server from prometheus_client.core import ( @@ -80,6 +79,11 @@ ) from prometheus_client.core import Metric as PrometheusMetric +from opentelemetry.exporter.prometheus._mapping import ( + map_unit, + sanitize_attribute, + sanitize_full_name, +) from opentelemetry.sdk.environment_variables import ( OTEL_EXPORTER_PROMETHEUS_HOST, OTEL_EXPORTER_PROMETHEUS_PORT, @@ -101,6 +105,7 @@ MetricsData, Sum, ) +from opentelemetry.util.types import Attributes _logger = getLogger(__name__) @@ -164,10 +169,7 @@ class _CustomCollector: def __init__(self, disable_target_info: bool = False): self._callback = None - self._metrics_datas = deque() - self._non_letters_digits_underscore_re = compile( - r"[^\w]", UNICODE | IGNORECASE - ) + self._metrics_datas: Deque[MetricsData] = deque() self._disable_target_info = disable_target_info self._target_info = None @@ -175,7 +177,7 @@ def add_metrics_data(self, metrics_data: MetricsData) -> None: """Add metrics to Prometheus data""" self._metrics_datas.append(metrics_data) - def collect(self) -> None: + def collect(self) -> Iterable[PrometheusMetric]: """Collect fetches the metrics from OpenTelemetry and delivers them as Prometheus Metrics. Collect is invoked every time a ``prometheus.Gatherer`` is run @@ -189,7 +191,7 @@ def collect(self) -> None: if len(self._metrics_datas): if not self._disable_target_info: if self._target_info is None: - attributes = {} + attributes: Attributes = {} for res in self._metrics_datas[0].resource_metrics: attributes = {**attributes, **res.resource.attributes} @@ -228,17 +230,17 @@ def _translate_to_prometheus( pre_metric_family_ids = [] - metric_name = "" - metric_name += self._sanitize(metric.name) + metric_name = sanitize_full_name(metric.name) metric_description = metric.description or "" + metric_unit = map_unit(metric.unit) for number_data_point in metric.data.data_points: label_keys = [] label_values = [] for key, value in sorted(number_data_point.attributes.items()): - label_keys.append(self._sanitize(key)) + label_keys.append(sanitize_attribute(key)) label_values.append(self._check_value(value)) pre_metric_family_ids.append( @@ -247,7 +249,7 @@ def _translate_to_prometheus( metric_name, metric_description, "%".join(label_keys), - metric.unit, + metric_unit, ] ) ) @@ -299,7 +301,7 @@ def _translate_to_prometheus( name=metric_name, documentation=metric_description, labels=label_keys, - unit=metric.unit, + unit=metric_unit, ) ) metric_family_id_metric_family[ @@ -323,7 +325,7 @@ def _translate_to_prometheus( name=metric_name, documentation=metric_description, labels=label_keys, - unit=metric.unit, + unit=metric_unit, ) ) metric_family_id_metric_family[ @@ -344,7 +346,7 @@ def _translate_to_prometheus( name=metric_name, documentation=metric_description, labels=label_keys, - unit=metric.unit, + unit=metric_unit, ) ) metric_family_id_metric_family[ @@ -361,12 +363,6 @@ def _translate_to_prometheus( "Unsupported metric data. %s", type(metric.data) ) - def _sanitize(self, key: str) -> str: - """sanitize the given metric name or label according to Prometheus rule. - Replace all characters other than [A-Za-z0-9_] with '_'. - """ - return self._non_letters_digits_underscore_re.sub("_", key) - # pylint: disable=no-self-use def _check_value(self, value: Union[int, float, str, Sequence]) -> str: """Check the label value and return is appropriate representation""" @@ -380,7 +376,7 @@ def _create_info_metric( """Create an Info Metric Family with list of attributes""" # sanitize the attribute names according to Prometheus rule attributes = { - self._sanitize(key): self._check_value(value) + sanitize_attribute(key): self._check_value(value) for key, value in attributes.items() } info = InfoMetricFamily(name, description, labels=attributes) diff --git a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py new file mode 100644 index 00000000000..636066c5054 --- /dev/null +++ b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py @@ -0,0 +1,135 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from re import UNICODE, compile + +_SANITIZE_NAME_RE = compile(r"([^a-zA-Z0-9:]+)|_{2,}", UNICODE) +# Same as name, but doesn't allow ":" +_SANITIZE_ATTRIBUTE_KEY_RE = compile(r"([^a-zA-Z0-9]+)|_{2,}", UNICODE) + +# UCUM annotations are ASCII chars 33-126 enclosed in curly braces +# https://ucum.org/ucum#para-6 +_UCUM_ANNOTATION_CURLY = compile(r"{[!-~]*}") + +# Remaps common UCUM and SI units to prometheus conventions. Copied from +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.101.0/pkg/translator/prometheus/normalize_name.go#L19 +# See specification: +# https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1 +_UNIT_MAPPINGS = { + # Time + "d": "days", + "h": "hours", + "min": "minutes", + "s": "seconds", + "ms": "milliseconds", + "us": "microseconds", + "ns": "nanoseconds", + # Bytes + "By": "bytes", + "KiBy": "kibibytes", + "MiBy": "mebibytes", + "GiBy": "gibibytes", + "TiBy": "tibibytes", + "KBy": "kilobytes", + "MBy": "megabytes", + "GBy": "gigabytes", + "TBy": "terabytes", + # SI + "m": "meters", + "V": "volts", + "A": "amperes", + "J": "joules", + "W": "watts", + "g": "grams", + # Misc + "Cel": "celsius", + "Hz": "hertz", + # TODO: this conflicts with the spec but I think it is correct. Need to open a spec issue + "1": "", + "%": "percent", +} +# Similar to _UNIT_MAPPINGS, but for "per" unit denominator. +# Example: s => per second (singular) +# Copied from https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/80317ce83ed87a2dff0c316bb939afbfaa823d5e/pkg/translator/prometheus/normalize_name.go#L58 +_PER_UNIT_MAPPINGS = { + "s": "second", + "m": "minute", + "h": "hour", + "d": "day", + "w": "week", + "mo": "month", + "y": "year", +} + + +def sanitize_full_name(name: str) -> str: + """sanitize the given metric name according to Prometheus rule, including sanitizing + leading digits + + https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1 + """ + # Leading number special case + if name and name[0].isdigit(): + name = "_" + name[1:] + return _sanitize_name(name) + + +def _sanitize_name(name: str) -> str: + """sanitize the given metric name according to Prometheus rule, but does not handle + sanitizing a leading digit.""" + return _SANITIZE_NAME_RE.sub("_", name) + + +def sanitize_attribute(key: str) -> str: + """sanitize the given metric attribute key according to Prometheus rule. + + https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-attributes + """ + # Leading number special case + if key and key[0].isdigit(): + key = "_" + key[1:] + return _SANITIZE_ATTRIBUTE_KEY_RE.sub("_", key) + + +def map_unit(unit: str) -> str: + """Maps unit to common prometheus metric names if available and sanitizes any invalid + characters + + See: + - https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1 + - https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.101.0/pkg/translator/prometheus/normalize_name.go#L108 + """ + # remove curly brace UCUM annotations + unit = _UCUM_ANNOTATION_CURLY.sub("", unit) + + if unit in _UNIT_MAPPINGS: + return _UNIT_MAPPINGS[unit] + + # replace "/" with "per" units like m/s -> meters_per_second + ratio_unit_subparts = unit.split("/", maxsplit=1) + if len(ratio_unit_subparts) == 2: + bottom = _sanitize_name(ratio_unit_subparts[1]) + if bottom: + top = _sanitize_name(ratio_unit_subparts[0]) + top = _UNIT_MAPPINGS.get(top, top) + bottom = _PER_UNIT_MAPPINGS.get(bottom, bottom) + return f"{top}_per_{bottom}" if top else f"per_{bottom}" + + return ( + # since units end up as a metric name suffix, they must be sanitized + _sanitize_name(unit) + # strip surrounding "_" chars since it will lead to consecutive underscores in the + # metric name + .strip("_") + ) diff --git a/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py b/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py new file mode 100644 index 00000000000..d8ffe544dea --- /dev/null +++ b/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py @@ -0,0 +1,113 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import TestCase + +from opentelemetry.exporter.prometheus._mapping import ( + map_unit, + sanitize_attribute, + sanitize_full_name, +) + + +class TestMapping(TestCase): + def testsanitize_full_name(self): + self.assertEqual( + sanitize_full_name("valid_metric_name"), "valid_metric_name" + ) + self.assertEqual( + sanitize_full_name("VALID_METRIC_NAME"), "VALID_METRIC_NAME" + ) + self.assertEqual( + sanitize_full_name("_valid_metric_name"), "_valid_metric_name" + ) + self.assertEqual( + sanitize_full_name("valid:metric_name"), "valid:metric_name" + ) + self.assertEqual( + sanitize_full_name("valid_1_metric_name"), "valid_1_metric_name" + ) + self.assertEqual( + sanitize_full_name("1leading_digit"), "_leading_digit" + ) + self.assertEqual( + sanitize_full_name("1_~#consective_underscores"), + "_consective_underscores", + ) + self.assertEqual( + sanitize_full_name("1!2@3#4$5%6^7&8*9(0)_-"), + "_2_3_4_5_6_7_8_9_0_", + ) + self.assertEqual(sanitize_full_name("foo,./?;:[]{}bar"), "foo_:_bar") + self.assertEqual(sanitize_full_name("TestString"), "TestString") + self.assertEqual(sanitize_full_name("aAbBcC_12_oi"), "aAbBcC_12_oi") + + def testsanitize_attribute(self): + self.assertEqual( + sanitize_attribute("valid_attr_key"), "valid_attr_key" + ) + self.assertEqual( + sanitize_attribute("VALID_attr_key"), "VALID_attr_key" + ) + self.assertEqual( + sanitize_attribute("_valid_attr_key"), "_valid_attr_key" + ) + self.assertEqual( + sanitize_attribute("valid_1_attr_key"), "valid_1_attr_key" + ) + self.assertEqual( + sanitize_attribute("sanitize:colons"), "sanitize_colons" + ) + self.assertEqual( + sanitize_attribute("1leading_digit"), "_leading_digit" + ) + self.assertEqual( + sanitize_attribute("1_~#consective_underscores"), + "_consective_underscores", + ) + self.assertEqual( + sanitize_attribute("1!2@3#4$5%6^7&8*9(0)_-"), + "_2_3_4_5_6_7_8_9_0_", + ) + self.assertEqual(sanitize_attribute("foo,./?;:[]{}bar"), "foo_bar") + self.assertEqual(sanitize_attribute("TestString"), "TestString") + self.assertEqual(sanitize_attribute("aAbBcC_12_oi"), "aAbBcC_12_oi") + + def testmap_unit(self): + # select hardcoded mappings + self.assertEqual(map_unit("s"), "seconds") + self.assertEqual(map_unit("By"), "bytes") + self.assertEqual(map_unit("m"), "meters") + # should work with UCUM annotations as well + self.assertEqual(map_unit("g{dogfood}"), "grams") + + # UCUM "default unit" aka unity and equivalent UCUM annotations should be stripped + self.assertEqual(map_unit("1"), "") + self.assertEqual(map_unit("{request}"), "") + self.assertEqual(map_unit("{{{;@#$}}}"), "") + + # conversion of per units + self.assertEqual(map_unit("km/h"), "km_per_hour") + self.assertEqual(map_unit("m/s"), "meters_per_second") + self.assertEqual(map_unit("{foo}/s"), "per_second") + self.assertEqual(map_unit("foo/bar"), "foo_per_bar") + self.assertEqual(map_unit("2fer/store"), "2fer_per_store") + + # should be sanitized to become part of the metric name without surrounding "_" + self.assertEqual(map_unit("____"), "") + self.assertEqual(map_unit("____"), "") + self.assertEqual(map_unit("1:foo#@!"), "1:foo") + # should not be interpreted as a per unit since there is no denominator + self.assertEqual(map_unit("m/"), "m") + self.assertEqual(map_unit("m/{bar}"), "m") diff --git a/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py b/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py index 7f0cbaaaeeb..64a224e7c93 100644 --- a/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py +++ b/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py @@ -40,6 +40,7 @@ from opentelemetry.sdk.resources import Resource from opentelemetry.test.metrictestutil import ( _generate_gauge, + _generate_histogram, _generate_sum, _generate_unsupported_metric, ) @@ -53,6 +54,31 @@ def setUp(self): side_effect=self._mock_registry_register, ) + def verify_text_format( + self, metric: Metric, expect_prometheus_text: str + ) -> None: + metrics_data = MetricsData( + resource_metrics=[ + ResourceMetrics( + resource=Mock(), + scope_metrics=[ + ScopeMetrics( + scope=Mock(), + metrics=[metric], + schema_url="schema_url", + ) + ], + schema_url="schema_url", + ) + ] + ) + + collector = _CustomCollector(disable_target_info=True) + collector.add_metrics_data(metrics_data) + result_bytes = generate_latest(collector) + result = result_bytes.decode("utf-8") + self.assertEqual(result, expect_prometheus_text) + # pylint: disable=protected-access def test_constructor(self): """Test the constructor.""" @@ -90,37 +116,17 @@ def test_histogram_to_prometheus(self): aggregation_temporality=AggregationTemporality.DELTA, ), ) - metrics_data = MetricsData( - resource_metrics=[ - ResourceMetrics( - resource=Mock(), - scope_metrics=[ - ScopeMetrics( - scope=Mock(), - metrics=[metric], - schema_url="schema_url", - ) - ], - schema_url="schema_url", - ) - ] - ) - - collector = _CustomCollector(disable_target_info=True) - collector.add_metrics_data(metrics_data) - result_bytes = generate_latest(collector) - result = result_bytes.decode("utf-8") - self.assertEqual( - result, + self.verify_text_format( + metric, dedent( """\ - # HELP test_name_s foo - # TYPE test_name_s histogram - test_name_s_bucket{histo="1",le="123.0"} 1.0 - test_name_s_bucket{histo="1",le="456.0"} 4.0 - test_name_s_bucket{histo="1",le="+Inf"} 6.0 - test_name_s_count{histo="1"} 6.0 - test_name_s_sum{histo="1"} 579.0 + # HELP test_name_seconds foo + # TYPE test_name_seconds histogram + test_name_seconds_bucket{histo="1",le="123.0"} 1.0 + test_name_seconds_bucket{histo="1",le="456.0"} 4.0 + test_name_seconds_bucket{histo="1",le="+Inf"} 6.0 + test_name_seconds_count{histo="1"} 6.0 + test_name_seconds_sum{histo="1"} 579.0 """ ), ) @@ -270,16 +276,6 @@ def test_invalid_metric(self): collector.collect() self.assertLogs("opentelemetry.exporter.prometheus", level="WARNING") - def test_sanitize(self): - collector = _CustomCollector() - self.assertEqual( - collector._sanitize("1!2@3#4$5%6^7&8*9(0)_-"), - "1_2_3_4_5_6_7_8_9_0___", - ) - self.assertEqual(collector._sanitize(",./?;:[]{}"), "__________") - self.assertEqual(collector._sanitize("TestString"), "TestString") - self.assertEqual(collector._sanitize("aAbBcC_12_oi"), "aAbBcC_12_oi") - def test_list_labels(self): labels = {"environment@": ["1", "2", "3"], "os": "Unix"} metric = _generate_gauge( @@ -457,3 +453,161 @@ def test_label_order_does_not_matter(self): # Only one metric is generated metric_count = prometheus_output.count("# HELP counter_total") self.assertEqual(metric_count, 1) + + def test_metric_name(self): + self.verify_text_format( + _generate_sum(name="test_counter", value=1, unit=""), + dedent( + """\ + # HELP test_counter_total foo + # TYPE test_counter_total counter + test_counter_total{a="1",b="true"} 1.0 + """ + ), + ) + self.verify_text_format( + _generate_sum(name="1leading_digit", value=1, unit=""), + dedent( + """\ + # HELP _leading_digit_total foo + # TYPE _leading_digit_total counter + _leading_digit_total{a="1",b="true"} 1.0 + """ + ), + ) + self.verify_text_format( + _generate_sum(name="!@#counter_invalid_chars", value=1, unit=""), + dedent( + """\ + # HELP _counter_invalid_chars_total foo + # TYPE _counter_invalid_chars_total counter + _counter_invalid_chars_total{a="1",b="true"} 1.0 + """ + ), + ) + + def test_metric_name_with_unit(self): + self.verify_text_format( + _generate_gauge(name="test.metric.no_unit", value=1, unit=""), + dedent( + """\ + # HELP test_metric_no_unit foo + # TYPE test_metric_no_unit gauge + test_metric_no_unit{a="1",b="true"} 1.0 + """ + ), + ) + self.verify_text_format( + _generate_gauge( + name="test.metric.spaces", value=1, unit=" \t " + ), + dedent( + """\ + # HELP test_metric_spaces foo + # TYPE test_metric_spaces gauge + test_metric_spaces{a="1",b="true"} 1.0 + """ + ), + ) + + # UCUM annotations should be stripped + self.verify_text_format( + _generate_sum(name="test_counter", value=1, unit="{requests}"), + dedent( + """\ + # HELP test_counter_total foo + # TYPE test_counter_total counter + test_counter_total{a="1",b="true"} 1.0 + """ + ), + ) + + # slash converts to "per" + self.verify_text_format( + _generate_gauge(name="test_gauge", value=1, unit="m/s"), + dedent( + """\ + # HELP test_gauge_meters_per_second foo + # TYPE test_gauge_meters_per_second gauge + test_gauge_meters_per_second{a="1",b="true"} 1.0 + """ + ), + ) + + # invalid characters in name are sanitized before being passed to prom client, which + # would throw errors + self.verify_text_format( + _generate_sum(name="test_counter", value=1, unit="%{foo}@?"), + dedent( + """\ + # HELP test_counter_total foo + # TYPE test_counter_total counter + test_counter_total{a="1",b="true"} 1.0 + """ + ), + ) + + def test_semconv(self): + """Tests that a few select semconv metrics get converted to the expected prometheus + text format""" + self.verify_text_format( + _generate_sum( + name="system.filesystem.usage", + value=1, + is_monotonic=False, + unit="By", + ), + dedent( + """\ + # HELP system_filesystem_usage_bytes foo + # TYPE system_filesystem_usage_bytes gauge + system_filesystem_usage_bytes{a="1",b="true"} 1.0 + """ + ), + ) + self.verify_text_format( + _generate_sum( + name="system.network.dropped", + value=1, + unit="{packets}", + ), + dedent( + """\ + # HELP system_network_dropped_total foo + # TYPE system_network_dropped_total counter + system_network_dropped_total{a="1",b="true"} 1.0 + """ + ), + ) + self.verify_text_format( + _generate_histogram( + name="http.server.request.duration", + unit="s", + ), + dedent( + """\ + # HELP http_server_request_duration_seconds foo + # TYPE http_server_request_duration_seconds histogram + http_server_request_duration_seconds_bucket{a="1",b="true",le="123.0"} 1.0 + http_server_request_duration_seconds_bucket{a="1",b="true",le="456.0"} 4.0 + http_server_request_duration_seconds_bucket{a="1",b="true",le="+Inf"} 6.0 + http_server_request_duration_seconds_count{a="1",b="true"} 6.0 + http_server_request_duration_seconds_sum{a="1",b="true"} 579.0 + """ + ), + ) + self.verify_text_format( + _generate_sum( + name="http.server.active_requests", + value=1, + unit="{request}", + is_monotonic=False, + ), + dedent( + """\ + # HELP http_server_active_requests foo + # TYPE http_server_active_requests gauge + http_server_active_requests{a="1",b="true"} 1.0 + """ + ), + ) diff --git a/tests/opentelemetry-test-utils/src/opentelemetry/test/metrictestutil.py b/tests/opentelemetry-test-utils/src/opentelemetry/test/metrictestutil.py index ff25b092a66..08354bf0de4 100644 --- a/tests/opentelemetry-test-utils/src/opentelemetry/test/metrictestutil.py +++ b/tests/opentelemetry-test-utils/src/opentelemetry/test/metrictestutil.py @@ -13,14 +13,19 @@ # limitations under the License. +from typing import Optional + from opentelemetry.attributes import BoundedAttributes from opentelemetry.sdk.metrics.export import ( AggregationTemporality, Gauge, + Histogram, + HistogramDataPoint, Metric, NumberDataPoint, Sum, ) +from opentelemetry.util.types import Attributes def _generate_metric( @@ -98,3 +103,34 @@ def _generate_unsupported_metric( description=description, unit=unit, ) + + +def _generate_histogram( + name: str, + attributes: Attributes = None, + description: Optional[str] = None, + unit: Optional[str] = None, +) -> Metric: + if attributes is None: + attributes = BoundedAttributes(attributes={"a": 1, "b": True}) + return _generate_metric( + name, + Histogram( + data_points=[ + HistogramDataPoint( + attributes=attributes, + start_time_unix_nano=1641946016139533244, + time_unix_nano=1641946016139533244, + count=6, + sum=579.0, + bucket_counts=[1, 3, 2], + explicit_bounds=[123.0, 456.0], + min=1, + max=457, + ) + ], + aggregation_temporality=AggregationTemporality.CUMULATIVE, + ), + description=description, + unit=unit, + ) From 07aff6261f6e8e2e775b88cfb1c4eeba8f45b831 Mon Sep 17 00:00:00 2001 From: Aaron Abbott Date: Thu, 23 May 2024 00:23:14 -0400 Subject: [PATCH 2/7] Apply suggestions from code review Co-authored-by: Diego Hurtado --- .../opentelemetry-exporter-prometheus/tests/test_mapping.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py b/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py index d8ffe544dea..f2c0697ed7a 100644 --- a/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py +++ b/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py @@ -22,7 +22,7 @@ class TestMapping(TestCase): - def testsanitize_full_name(self): + def test_sanitize_full_name(self): self.assertEqual( sanitize_full_name("valid_metric_name"), "valid_metric_name" ) @@ -53,7 +53,7 @@ def testsanitize_full_name(self): self.assertEqual(sanitize_full_name("TestString"), "TestString") self.assertEqual(sanitize_full_name("aAbBcC_12_oi"), "aAbBcC_12_oi") - def testsanitize_attribute(self): + def test_sanitize_attribute(self): self.assertEqual( sanitize_attribute("valid_attr_key"), "valid_attr_key" ) From 7c729b61d1ea6f7aaca76ead347a2297c427a1c7 Mon Sep 17 00:00:00 2001 From: Aaron Abbott Date: Thu, 23 May 2024 04:32:19 +0000 Subject: [PATCH 3/7] Make annotation parsing more permissive, add test case for consecutive underscores --- .../src/opentelemetry/exporter/prometheus/_mapping.py | 10 +++++----- .../tests/test_mapping.py | 8 +++++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py index 636066c5054..0c1fe86de7c 100644 --- a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py +++ b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py @@ -18,9 +18,9 @@ # Same as name, but doesn't allow ":" _SANITIZE_ATTRIBUTE_KEY_RE = compile(r"([^a-zA-Z0-9]+)|_{2,}", UNICODE) -# UCUM annotations are ASCII chars 33-126 enclosed in curly braces -# https://ucum.org/ucum#para-6 -_UCUM_ANNOTATION_CURLY = compile(r"{[!-~]*}") +# UCUM style annotations which are text enclosed in curly braces https://ucum.org/ucum#para-6. +# This regex is more permissive than UCUM allows and matches any character within curly braces. +_UNIT_ANNOTATION = compile(r"{.*}") # Remaps common UCUM and SI units to prometheus conventions. Copied from # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.101.0/pkg/translator/prometheus/normalize_name.go#L19 @@ -110,8 +110,8 @@ def map_unit(unit: str) -> str: - https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1 - https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.101.0/pkg/translator/prometheus/normalize_name.go#L108 """ - # remove curly brace UCUM annotations - unit = _UCUM_ANNOTATION_CURLY.sub("", unit) + # remove curly brace unit annotations + unit = _UNIT_ANNOTATION.sub("", unit) if unit in _UNIT_MAPPINGS: return _UNIT_MAPPINGS[unit] diff --git a/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py b/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py index f2c0697ed7a..f2641de17a7 100644 --- a/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py +++ b/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py @@ -41,6 +41,10 @@ def test_sanitize_full_name(self): self.assertEqual( sanitize_full_name("1leading_digit"), "_leading_digit" ) + self.assertEqual( + sanitize_full_name("consective_____underscores"), + "consective_underscores", + ) self.assertEqual( sanitize_full_name("1_~#consective_underscores"), "_consective_underscores", @@ -84,7 +88,7 @@ def test_sanitize_attribute(self): self.assertEqual(sanitize_attribute("TestString"), "TestString") self.assertEqual(sanitize_attribute("aAbBcC_12_oi"), "aAbBcC_12_oi") - def testmap_unit(self): + def test_map_unit(self): # select hardcoded mappings self.assertEqual(map_unit("s"), "seconds") self.assertEqual(map_unit("By"), "bytes") @@ -94,8 +98,10 @@ def testmap_unit(self): # UCUM "default unit" aka unity and equivalent UCUM annotations should be stripped self.assertEqual(map_unit("1"), "") + self.assertEqual(map_unit("{}"), "") self.assertEqual(map_unit("{request}"), "") self.assertEqual(map_unit("{{{;@#$}}}"), "") + self.assertEqual(map_unit("{unit with space}"), "") # conversion of per units self.assertEqual(map_unit("km/h"), "km_per_hour") From dad9948deab223cdb8e3a03845b0f7beea55c20d Mon Sep 17 00:00:00 2001 From: Aaron Abbott Date: Thu, 23 May 2024 04:45:51 +0000 Subject: [PATCH 4/7] Add test case for metric name already containing the unit --- .../tests/test_prometheus_exporter.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py b/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py index 64a224e7c93..9abf9629d20 100644 --- a/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py +++ b/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py @@ -611,3 +611,32 @@ def test_semconv(self): """ ), ) + # if the metric name already contains the unit, it shouldn't be added again + self.verify_text_format( + _generate_sum( + name="metric_name_with_myunit", + value=1, + unit="myunit", + ), + dedent( + """\ + # HELP metric_name_with_myunit_total foo + # TYPE metric_name_with_myunit_total counter + metric_name_with_myunit_total{a="1",b="true"} 1.0 + """ + ), + ) + self.verify_text_format( + _generate_gauge( + name="metric_name_percent", + value=1, + unit="%", + ), + dedent( + """\ + # HELP metric_name_percent foo + # TYPE metric_name_percent gauge + metric_name_percent{a="1",b="true"} 1.0 + """ + ), + ) From a5844daee4afe17810d1ebfa0e18aa0e935f021a Mon Sep 17 00:00:00 2001 From: Aaron Abbott Date: Thu, 23 May 2024 16:38:54 +0000 Subject: [PATCH 5/7] simplify and speed up regex and update TODO --- .../src/opentelemetry/exporter/prometheus/_mapping.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py index 0c1fe86de7c..077d2fbb2b8 100644 --- a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py +++ b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py @@ -14,9 +14,9 @@ from re import UNICODE, compile -_SANITIZE_NAME_RE = compile(r"([^a-zA-Z0-9:]+)|_{2,}", UNICODE) +_SANITIZE_NAME_RE = compile(r"[^a-zA-Z0-9:]+", UNICODE) # Same as name, but doesn't allow ":" -_SANITIZE_ATTRIBUTE_KEY_RE = compile(r"([^a-zA-Z0-9]+)|_{2,}", UNICODE) +_SANITIZE_ATTRIBUTE_KEY_RE = compile(r"[^a-zA-Z0-9]+", UNICODE) # UCUM style annotations which are text enclosed in curly braces https://ucum.org/ucum#para-6. # This regex is more permissive than UCUM allows and matches any character within curly braces. @@ -55,7 +55,9 @@ # Misc "Cel": "celsius", "Hz": "hertz", - # TODO: this conflicts with the spec but I think it is correct. Need to open a spec issue + # TODO(https://github.com/open-telemetry/opentelemetry-specification/issues/4058): the + # specification says to normalize "1" to ratio but that may change. Update this mapping or + # remove TODO once a decision is made. "1": "", "%": "percent", } From 78ffe92d542f8ed71307ea798918d99c486a1dcc Mon Sep 17 00:00:00 2001 From: Aaron Abbott Date: Thu, 23 May 2024 19:40:53 +0000 Subject: [PATCH 6/7] Add OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION opt-out mechanism --- CHANGELOG.md | 4 +++- .../exporter/prometheus/__init__.py | 15 +++++++++++- .../tests/test_prometheus_exporter.py | 23 +++++++++++++++++++ .../sdk/environment_variables.py | 18 +++++++++++++++ 4 files changed, 58 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9978898b382..0a4e061bb62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,10 +46,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#3924](https://github.com/open-telemetry/opentelemetry-python/pull/3924)) - this is a breaking change to prometheus metric names so they comply with the [specification](https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus). + - you can temporarily opt-out of the unit normalization by setting the environment variable + `OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION=true` - common unit abbreviations are converted to Prometheus conventions (`s` -> `seconds`), following the [collector's implementation](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/c0b51136575aa7ba89326d18edb4549e7e1bbdb9/pkg/translator/prometheus/normalize_name.go#L108) - repeated `_` are replaced with a single `_` - - UCUM annotations (enclosed in curly braces like `{requests}`) are stripped away + - unit annotations (enclosed in curly braces like `{requests}`) are stripped away - units with slash are converted e.g. `m/s` -> `meters_per_second`. - The exporter's API is not changed diff --git a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py index af8eedb10f1..7e6b2ecc01f 100644 --- a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py +++ b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py @@ -87,6 +87,7 @@ from opentelemetry.sdk.environment_variables import ( OTEL_EXPORTER_PROMETHEUS_HOST, OTEL_EXPORTER_PROMETHEUS_PORT, + OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION, ) from opentelemetry.sdk.metrics import Counter from opentelemetry.sdk.metrics import Histogram as HistogramInstrument @@ -233,7 +234,19 @@ def _translate_to_prometheus( metric_name = sanitize_full_name(metric.name) metric_description = metric.description or "" - metric_unit = map_unit(metric.unit) + + # TODO(#3929): remove this opt-out option + disable_unit_normalization = ( + environ.get( + OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION, + "false", + ).lower() + == "true" + ) + if disable_unit_normalization: + metric_unit = metric.unit + else: + metric_unit = map_unit(metric.unit) for number_data_point in metric.data.data_points: label_keys = [] diff --git a/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py b/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py index 9abf9629d20..ed11bb9ab7b 100644 --- a/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py +++ b/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from os import environ from textwrap import dedent from unittest import TestCase from unittest.mock import Mock, patch @@ -27,6 +28,9 @@ PrometheusMetricReader, _CustomCollector, ) +from opentelemetry.sdk.environment_variables import ( + OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION, +) from opentelemetry.sdk.metrics import MeterProvider from opentelemetry.sdk.metrics.export import ( AggregationTemporality, @@ -547,6 +551,25 @@ def test_metric_name_with_unit(self): ), ) + # TODO(#3929): remove this opt-out option + @patch.dict( + environ, + { + OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION: "true" + }, + ) + def test_metric_name_with_unit_normalization_disabled(self): + self.verify_text_format( + _generate_sum(name="test_unit_not_normalized", value=1, unit="s"), + dedent( + """\ + # HELP test_unit_not_normalized_s_total foo + # TYPE test_unit_not_normalized_s_total counter + test_unit_not_normalized_s_total{a="1",b="true"} 1.0 + """ + ), + ) + def test_semconv(self): """Tests that a few select semconv metrics get converted to the expected prometheus text format""" diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py index 6557d2a1391..d13e8163b86 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py @@ -710,3 +710,21 @@ This is an experimental environment variable and the name of this variable and its behavior can change in a non-backwards compatible way. """ + + +# TODO(#3929): remove this opt-out option +OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION = ( + "OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION" +) +""" +.. envvar:: OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION + +The :envvar:`OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION` environment +variable allows you to opt-out of the unit normalization described` in the specification +`_. + +Default: False + +This is an temporary environment variable that provides backward compatibility but will be +removed in the future https://github.com/open-telemetry/opentelemetry-python/issues/3929. +""" From d35f4b8a0a42bc97f20997c367abf77ec0c9027d Mon Sep 17 00:00:00 2001 From: Aaron Abbott Date: Thu, 23 May 2024 20:01:20 +0000 Subject: [PATCH 7/7] Fix RST typo --- .../src/opentelemetry/sdk/environment_variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py index d13e8163b86..99899539a1a 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py @@ -720,7 +720,7 @@ .. envvar:: OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION The :envvar:`OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION` environment -variable allows you to opt-out of the unit normalization described` in the specification +variable allows you to opt-out of the unit normalization described `in the specification `_. Default: False