Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Append unit to prometheus metric names #5400

Merged
merged 25 commits into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5b28758
Append units to Prometheus metric names
psx95 Apr 24, 2023
5e5661f
Fix failing tests due to metric names update
psx95 Apr 24, 2023
bccc044
Refactor unit conversion function
psx95 Apr 24, 2023
cb3956b
Fix edge cases for unit conversion
psx95 Apr 25, 2023
d5e7c40
Add tests for unit conversion
psx95 Apr 25, 2023
6e7b601
Fix naming issues to be compliant with the stylecheck
psx95 Apr 25, 2023
6c17344
Remove redundant validity check
psx95 Apr 25, 2023
0525969
Move 'per' unit check inside the coversion method
psx95 Apr 26, 2023
6f20800
Move abbreviated unit mappings: Map to Switch-Case
psx95 Apr 26, 2023
f2fea06
Add unit tests to cover all expansion cases
psx95 Apr 26, 2023
7568167
Add missing test case to increase coverage
psx95 Apr 26, 2023
bab0e57
Add missing documentation
psx95 Apr 27, 2023
19a93fe
Make PrometheusUnitsHelper class internal
psx95 Apr 28, 2023
2937d84
Replace string replace with pattern matching
psx95 Apr 28, 2023
ac96ab0
Refactor: unit name cleanup logic moved to Serializer
psx95 Apr 28, 2023
5d3488f
Add tests for metricName serialization
psx95 Apr 29, 2023
bf33ae0
Cleanup units before returning
psx95 Apr 29, 2023
0cd3aab
Appends units if not present in metric name
psx95 Apr 29, 2023
519ceb8
Merge branch 'main' into issue-4390
psx95 May 5, 2023
c4df7ef
Merge branch 'main' into issue-4390
psx95 Jun 2, 2023
7bf71a2
Convert public method to package-private
psx95 Jun 2, 2023
0f2c3c1
Rename method sampleMetricDataGenerator -> createSampleMetricData
psx95 Jun 2, 2023
38ee5a8
Apply caching mechanism to prometheus metric name mapping
psx95 Jun 3, 2023
cd89da8
Remove 1 as unit for non-gauges in test data
psx95 Jun 3, 2023
c302e91
Creates an AutoValue class for cache mapping keys
psx95 Jun 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class NameSanitizer implements Function<String, String> {

static final NameSanitizer INSTANCE = new NameSanitizer();

static final Pattern SANITIZE_CONSECUTIVE_UNDERSCORES = Pattern.compile("[_]{2,}");

private static final Pattern SANITIZE_PREFIX_PATTERN = Pattern.compile("^[^a-zA-Z_:]");
private static final Pattern SANITIZE_BODY_PATTERN = Pattern.compile("[^a-zA-Z0-9_:]");

Expand All @@ -36,8 +38,11 @@ public String apply(String labelName) {
}

private static String sanitizeMetricName(String metricName) {
return SANITIZE_BODY_PATTERN
.matcher(SANITIZE_PREFIX_PATTERN.matcher(metricName).replaceFirst("_"))
return SANITIZE_CONSECUTIVE_UNDERSCORES
.matcher(
SANITIZE_BODY_PATTERN
.matcher(SANITIZE_PREFIX_PATTERN.matcher(metricName).replaceFirst("_"))
.replaceAll("_"))
.replaceAll("_");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/

package io.opentelemetry.exporter.prometheus;

import static io.opentelemetry.exporter.prometheus.NameSanitizer.SANITIZE_CONSECUTIVE_UNDERSCORES;

import io.opentelemetry.api.internal.StringUtils;
import java.util.regex.Pattern;

/**
* A utility class that contains helper function(s) to aid conversion from OTLP to Prometheus units.
*
* @see <a
* href="https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#units-and-base-units">OpenMetrics
* specification for units</a>
* @see <a href="https://prometheus.io/docs/practices/naming/#base-units">Prometheus best practices
* for units</a>
*/
final class PrometheusUnitsHelper {

private static final Pattern INVALID_CHARACTERS_PATTERN = Pattern.compile("[^a-zA-Z0-9]");
private static final Pattern CHARACTERS_BETWEEN_BRACES_PATTERN = Pattern.compile("\\{(.*?)}");
private static final Pattern SANITIZE_LEADING_UNDERSCORES = Pattern.compile("^_+");
private static final Pattern SANITIZE_TRAILING_UNDERSCORES = Pattern.compile("_+$");

private PrometheusUnitsHelper() {
// Prevent object creation for utility classes
}

/**
* A utility function that returns the equivalent Prometheus name for the provided OTLP metric
* unit.
*
* @param rawMetricUnitName The raw metric unit for which Prometheus metric unit needs to be
* computed.
* @return the computed Prometheus metric unit equivalent of the OTLP metric un
*/
public static String getEquivalentPrometheusUnit(String rawMetricUnitName) {
psx95 marked this conversation as resolved.
Show resolved Hide resolved
if (StringUtils.isNullOrEmpty(rawMetricUnitName)) {
return rawMetricUnitName;
}
// Drop units specified between curly braces
String convertedMetricUnitName = removeUnitPortionInBraces(rawMetricUnitName);
// Handling for the "per" unit(s), e.g. foo/bar -> foo_per_bar
convertedMetricUnitName = convertRateExpressedToPrometheusUnit(convertedMetricUnitName);
// Converting abbreviated unit names to full names
return cleanUpString(getPrometheusUnit(convertedMetricUnitName));
}

/**
* This method is used to convert the units expressed as a rate via '/' symbol in their name to
* their expanded text equivalent. For instance, km/h => km_per_hour. The method operates on the
* input by splitting it in 2 parts - before and after '/' symbol and will attempt to expand any
* known unit abbreviation in both parts. Unknown abbreviations & unsupported characters will
* remain unchanged in the final output of this function.
*
* @param rateExpressedUnit The rate unit input that needs to be converted to its text equivalent.
* @return The text equivalent of unit expressed as rate. If the input does not contain '/', the
* function returns it as-is.
*/
private static String convertRateExpressedToPrometheusUnit(String rateExpressedUnit) {
psx95 marked this conversation as resolved.
Show resolved Hide resolved
if (!rateExpressedUnit.contains("/")) {
return rateExpressedUnit;
}
String[] rateEntities = rateExpressedUnit.split("/", 2);
// Only convert rate expressed units if it's a valid expression
if (rateEntities[1].equals("")) {
psx95 marked this conversation as resolved.
Show resolved Hide resolved
return rateExpressedUnit;
}
return getPrometheusUnit(rateEntities[0]) + "_per_" + getPrometheusPerUnit(rateEntities[1]);
}

/**
* This method drops all characters enclosed within '{}' (including the curly braces) by replacing
* them with an empty string. Note that this method will not produce the intended effect if there
* are nested curly braces within the outer enclosure of '{}'.
*
* <p>For instance, {packet{s}s} => s}.
*
* @param unit The input unit from which text within curly braces needs to be removed.
* @return The resulting unit after removing the text within '{}'.
*/
private static String removeUnitPortionInBraces(String unit) {
return CHARACTERS_BETWEEN_BRACES_PATTERN.matcher(unit).replaceAll("");
}

/**
* Replaces all characters that are not a letter or a digit with '_' to make the resulting string
* Prometheus compliant. This method also removes leading and trailing underscores - this is done
* to keep the resulting unit similar to what is produced from the collector's implementation.
*
* @param string The string input that needs to be made Prometheus compliant.
* @return the cleaned-up Prometheus compliant string.
*/
private static String cleanUpString(String string) {
return SANITIZE_LEADING_UNDERSCORES
.matcher(
SANITIZE_TRAILING_UNDERSCORES
.matcher(
SANITIZE_CONSECUTIVE_UNDERSCORES
.matcher(INVALID_CHARACTERS_PATTERN.matcher(string).replaceAll("_"))
Comment on lines +99 to +104
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: as a future performance improvement we could probably replace all these regexes (and those in NameSanitizer too) with a single loop that removes all the unwanted characters.

.replaceAll("_"))
.replaceAll(""))
.replaceAll("");
}

/**
* This method retrieves the expanded Prometheus unit name for known abbreviations. OTLP metrics
* use the c/s notation as specified at <a href="https://ucum.org/ucum.html">UCUM</a>. The list of
* mappings is adopted from <a
* href="https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/9a9d4778bbbf242dba233db28e2fbcfda3416959/pkg/translator/prometheus/normalize_name.go#L30">OpenTelemetry
* Collector Contrib</a>.
*
* @param unitAbbreviation The unit that name that needs to be expanded/converted to Prometheus
* units.
* @return The expanded/converted unit name if known, otherwise returns the input unit name as-is.
*/
private static String getPrometheusUnit(String unitAbbreviation) {
switch (unitAbbreviation) {
// Time
case "d":
return "days";
case "h":
return "hours";
case "min":
return "minutes";
case "s":
return "seconds";
case "ms":
return "milliseconds";
case "us":
return "microseconds";
case "ns":
return "nanoseconds";
// Bytes
case "By":
return "bytes";
case "KiBy":
return "kibibytes";
case "MiBy":
return "mebibytes";
case "GiBy":
return "gibibytes";
case "TiBy":
return "tibibytes";
case "KBy":
return "kilobytes";
case "MBy":
return "megabytes";
case "GBy":
return "gigabytes";
case "TBy":
return "terabytes";
case "B":
return "bytes";
case "KB":
return "kilobytes";
case "MB":
return "megabytes";
case "GB":
return "gigabytes";
case "TB":
return "terabytes";
// SI
case "m":
return "meters";
case "V":
return "volts";
case "A":
return "amperes";
case "J":
return "joules";
case "W":
return "watts";
case "g":
return "grams";
// Misc
case "Cel":
return "celsius";
case "Hz":
return "hertz";
case "1":
return "";
case "%":
return "percent";
case "$":
return "dollars";
default:
return unitAbbreviation;
}
}

/**
* This method retrieves the expanded Prometheus unit name to be used with "per" units for known
* units. For example: s => per second (singular)
*
* @param perUnitAbbreviation The unit abbreviation used in a 'per' unit.
* @return The expanded unit equivalent to be used in 'per' unit if the input is a known unit,
* otherwise returns the input as-is.
*/
private static String getPrometheusPerUnit(String perUnitAbbreviation) {
switch (perUnitAbbreviation) {
case "s":
return "second";
case "m":
return "minute";
case "h":
return "hour";
case "d":
return "day";
case "w":
return "week";
case "mo":
return "month";
case "y":
return "year";
default:
return perUnitAbbreviation;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.internal.StringUtils;
import io.opentelemetry.api.trace.SpanContext;
import io.opentelemetry.sdk.common.InstrumentationScopeInfo;
import io.opentelemetry.sdk.metrics.data.AggregationTemporality;
Expand Down Expand Up @@ -118,7 +119,7 @@ final Set<String> write(Collection<MetricData> metrics, OutputStream output) thr
continue;
}
PrometheusType prometheusType = PrometheusType.forMetric(metric);
String metricName = metricName(metric.getName(), prometheusType);
String metricName = metricName(metric, prometheusType);
// Skip metrics which do not pass metricNameFilter
if (!metricNameFilter.test(metricName)) {
continue;
Expand Down Expand Up @@ -650,11 +651,28 @@ static Collection<? extends PointData> getPoints(MetricData metricData) {
return Collections.emptyList();
}

private static String metricName(String rawMetricName, PrometheusType type) {
String name = NameSanitizer.INSTANCE.apply(rawMetricName);
if (type == PrometheusType.COUNTER && !name.endsWith("_total")) {
// Visible for testing
static String metricName(MetricData rawMetric, PrometheusType type) {
String name = NameSanitizer.INSTANCE.apply(rawMetric.getName());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you move this line below the next if you can probably get rid of the cleanUpString method in PrometheusUnitsHelper -- no need to perform sanitization twice after all. (And the performance will benefit from the cache in NameSanitizer as well)

String prometheusEquivalentUnit =
PrometheusUnitsHelper.getEquivalentPrometheusUnit(rawMetric.getUnit());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seeing how this is used, we probably want to cache the unit conversion the same way NameSanitizer caches its responses. Maybe even cache the combination of the metric name, unit, and prometheus type to avoid repeatedly performing the concatenation / suffix logic below.

// append prometheus unit if not null or empty.
if (!StringUtils.isNullOrEmpty(prometheusEquivalentUnit)
&& !name.contains(prometheusEquivalentUnit)) {
name = name + "_" + prometheusEquivalentUnit;
}

// special case - counter
if (type == PrometheusType.COUNTER && !name.contains("total")) {
name = name + "_total";
}
// special case - gauge
if (rawMetric.getUnit().equals("1")
&& type == PrometheusType.GAUGE
&& !name.contains("ratio")) {
name = name + "_ratio";
}
psx95 marked this conversation as resolved.
Show resolved Hide resolved

return name;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@

import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.stream.Stream;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class NameSanitizerTest {

Expand All @@ -27,4 +32,32 @@ void testSanitizerCaching() {
assertThat(sanitizer.apply(labelName)).isEqualTo("http.name1");
assertThat(count).hasValue(1);
}

@ParameterizedTest
@MethodSource("provideMetricNamesForTest")
void testSanitizerCleansing(String unsanitizedName, String sanitizedName) {
Assertions.assertEquals(sanitizedName, NameSanitizer.INSTANCE.apply(unsanitizedName));
}

private static Stream<Arguments> provideMetricNamesForTest() {
return Stream.of(
// valid name - already sanitized
Arguments.of(
"active_directory_ds_replication_network_io",
"active_directory_ds_replication_network_io"),
// consecutive underscores
Arguments.of("cpu_sp__d_hertz", "cpu_sp_d_hertz"),
// leading and trailing underscores - should be fine
Arguments.of("_cpu_speed_hertz_", "_cpu_speed_hertz_"),
// unsupported characters replaced
Arguments.of("metric_unit_$1000", "metric_unit_1000"),
// multiple unsupported characters - whitespace
Arguments.of("sample_me%%$$$_count_ !!@unit include", "sample_me_count_unit_include"),
// metric names cannot start with a number
Arguments.of("1_some_metric_name", "_some_metric_name"),
// metric names can have :
Arguments.of("sample_metric_name__:_per_meter", "sample_metric_name_:_per_meter"),
// Illegal characters
Arguments.of("cpu_sp$$d_hertz", "cpu_sp_d_hertz"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ void fetch_DuplicateMetrics() {
InstrumentationScopeInfo.create("scope1"),
"foo",
"description1",
"unit1",
"unit",
ImmutableSumData.create(
/* isMonotonic= */ true,
AggregationTemporality.CUMULATIVE,
Expand All @@ -248,7 +248,7 @@ void fetch_DuplicateMetrics() {
InstrumentationScopeInfo.create("scope2"),
"foo",
"description2",
"unit2",
"unit",
ImmutableSumData.create(
/* isMonotonic= */ true,
AggregationTemporality.CUMULATIVE,
Expand All @@ -259,9 +259,9 @@ void fetch_DuplicateMetrics() {
ImmutableMetricData.createLongGauge(
resource,
InstrumentationScopeInfo.create("scope3"),
"foo_total",
"unused",
"foo_unit_total",
"unused",
"unit",
ImmutableGaugeData.create(
Collections.singletonList(
ImmutableLongPointData.create(123, 456, Attributes.empty(), 3))))));
Expand All @@ -283,13 +283,13 @@ void fetch_DuplicateMetrics() {
+ "otel_scope_info{otel_scope_name=\"scope2\"} 1\n"
+ "# TYPE foo_total counter\n"
+ "# HELP foo_total description1\n"
+ "foo_total{otel_scope_name=\"scope1\"} 1.0 0\n"
+ "foo_total{otel_scope_name=\"scope2\"} 2.0 0\n");
+ "foo_unit_total{otel_scope_name=\"scope1\"} 1.0 0\n"
+ "foo_unit_total{otel_scope_name=\"scope2\"} 2.0 0\n");

// Validate conflict warning message
assertThat(logs.getEvents()).hasSize(1);
logs.assertContains(
"Metric conflict(s) detected. Multiple metrics with same name but different type: [foo_total]");
"Metric conflict(s) detected. Multiple metrics with same name but different type: [foo_unit_total]");

// Make another request and confirm warning is only logged once
client.get("/").aggregate().join();
Expand Down
Loading