Skip to content

Commit

Permalink
Update normalization for Metrics (#3332)
Browse files Browse the repository at this point in the history
* Update normalization for Metrics

* Update Changelog

* Do not escape control chars, as they get stripped by relay

* Update Changelog

* Update Changelog
  • Loading branch information
markushi authored Apr 8, 2024
1 parent dcc538b commit e74c05b
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 54 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Unreleased

## Features

- Update normalization of metrics keys, tags and values ([#3332](https://github.com/getsentry/sentry-java/pull/3332))

## 7.7.0

### Features
Expand Down
6 changes: 3 additions & 3 deletions sentry/api/sentry.api
Original file line number Diff line number Diff line change
Expand Up @@ -3534,14 +3534,14 @@ public final class io/sentry/metrics/MetricsHelper {
public static fun convertNanosTo (Lio/sentry/MeasurementUnit$Duration;J)D
public static fun encodeMetrics (JLjava/util/Collection;Ljava/lang/StringBuilder;)V
public static fun getCutoffTimestampMs (J)J
public static fun getDayBucketKey (Ljava/util/Calendar;)J
public static fun getExportKey (Lio/sentry/metrics/MetricType;Ljava/lang/String;Lio/sentry/MeasurementUnit;)Ljava/lang/String;
public static fun getMetricBucketKey (Lio/sentry/metrics/MetricType;Ljava/lang/String;Lio/sentry/MeasurementUnit;Ljava/util/Map;)Ljava/lang/String;
public static fun getTimeBucketKey (J)J
public static fun mergeTags (Ljava/util/Map;Ljava/util/Map;)Ljava/util/Map;
public static fun sanitizeKey (Ljava/lang/String;)Ljava/lang/String;
public static fun sanitizeName (Ljava/lang/String;)Ljava/lang/String;
public static fun sanitizeTagKey (Ljava/lang/String;)Ljava/lang/String;
public static fun sanitizeTagValue (Ljava/lang/String;)Ljava/lang/String;
public static fun sanitizeUnit (Ljava/lang/String;)Ljava/lang/String;
public static fun sanitizeValue (Ljava/lang/String;)Ljava/lang/String;
public static fun setFlushShiftMs (J)V
public static fun toStatsdType (Lio/sentry/metrics/MetricType;)Ljava/lang/String;
}
Expand Down
84 changes: 51 additions & 33 deletions sentry/src/main/java/io/sentry/metrics/MetricsHelper.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
package io.sentry.metrics;

import io.sentry.MeasurementUnit;
import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.TimeZone;
import java.util.regex.Pattern;
import org.jetbrains.annotations.ApiStatus;
import org.jetbrains.annotations.NotNull;
Expand All @@ -20,14 +18,9 @@ public final class MetricsHelper {
public static final int MAX_TOTAL_WEIGHT = 100000;
private static final int ROLLUP_IN_SECONDS = 10;

private static final Pattern INVALID_KEY_CHARACTERS_PATTERN =
Pattern.compile("[^a-zA-Z0-9_/.-]+");
private static final Pattern INVALID_VALUE_CHARACTERS_PATTERN =
Pattern.compile("[^\\w\\d\\s_:/@\\.\\{\\}\\[\\]$-]+");
// See
// https://docs.sysdig.com/en/docs/sysdig-monitor/integrations/working-with-integrations/custom-integrations/integrate-statsd-metrics/#characters-allowed-for-statsd-metric-names
private static final Pattern INVALID_METRIC_UNIT_CHARACTERS_PATTERN =
Pattern.compile("[^a-zA-Z0-9_/.]+");
private static final Pattern UNIT_PATTERN = Pattern.compile("\\W+");
private static final Pattern NAME_PATTERN = Pattern.compile("[^\\w\\-.]+");
private static final Pattern TAG_KEY_PATTERN = Pattern.compile("[^\\w\\-./]+");

private static final char TAGS_PAIR_DELIMITER = ','; // Delimiter between key-value pairs
private static final char TAGS_KEY_VALUE_DELIMITER = '='; // Delimiter between key and value
Expand All @@ -36,15 +29,6 @@ public final class MetricsHelper {
private static long FLUSH_SHIFT_MS =
(long) (new Random().nextFloat() * (ROLLUP_IN_SECONDS * 1000f));

public static long getDayBucketKey(final @NotNull Calendar timestamp) {
final Calendar utc = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
utc.set(Calendar.YEAR, timestamp.get(Calendar.YEAR));
utc.set(Calendar.MONTH, timestamp.get(Calendar.MONTH));
utc.set(Calendar.DAY_OF_MONTH, timestamp.get(Calendar.DAY_OF_MONTH));

return utc.getTimeInMillis() / 1000;
}

public static long getTimeBucketKey(final long timestampMs) {
final long seconds = timestampMs / 1000;
final long bucketKey = (seconds / ROLLUP_IN_SECONDS) * ROLLUP_IN_SECONDS;
Expand All @@ -59,12 +43,50 @@ public static long getCutoffTimestampMs(final long nowMs) {
return nowMs - (ROLLUP_IN_SECONDS * 1000) - FLUSH_SHIFT_MS;
}

public static @NotNull String sanitizeKey(final @NotNull String input) {
return INVALID_KEY_CHARACTERS_PATTERN.matcher(input).replaceAll("_");
@NotNull
public static String sanitizeUnit(final @NotNull String unit) {
return UNIT_PATTERN.matcher(unit).replaceAll("");
}

@NotNull
public static String sanitizeName(final @NotNull String input) {
return NAME_PATTERN.matcher(input).replaceAll("_");
}

public static String sanitizeValue(final @NotNull String input) {
return INVALID_VALUE_CHARACTERS_PATTERN.matcher(input).replaceAll("");
@NotNull
public static String sanitizeTagKey(final @NotNull String input) {
return TAG_KEY_PATTERN.matcher(input).replaceAll("");
}

@NotNull
public static String sanitizeTagValue(final @NotNull String input) {
// see https://develop.sentry.dev/sdk/metrics/#tag-values-replacement-map
// Line feed -> \n
// Carriage return -> \r
// Tab -> \t
// Backslash -> \\
// Pipe -> \\u{7c}
// Comma -> \\u{2c}
final StringBuilder output = new StringBuilder(input.length());
for (int idx = 0; idx < input.length(); idx++) {
final char ch = input.charAt(idx);
if (ch == '\n') {
output.append("\\n");
} else if (ch == '\r') {
output.append("\\r");
} else if (ch == '\t') {
output.append("\\t");
} else if (ch == '\\') {
output.append("\\\\");
} else if (ch == '|') {
output.append("\\u{7c}");
} else if (ch == ',') {
output.append("\\u{2c}");
} else {
output.append(ch);
}
}
return output.toString();
}

public static @NotNull String toStatsdType(final @NotNull MetricType type) {
Expand All @@ -89,7 +111,7 @@ public static String getMetricBucketKey(
final @Nullable MeasurementUnit unit,
final @Nullable Map<String, String> tags) {
final @NotNull String typePrefix = toStatsdType(type);
final @NotNull String serializedTags = GetTagsKey(tags);
final @NotNull String serializedTags = getTagsKey(tags);

final @NotNull String unitName = getUnitName(unit);
return String.format("%s_%s_%s_%s", typePrefix, metricKey, unitName, serializedTags);
Expand All @@ -100,7 +122,8 @@ private static String getUnitName(final @Nullable MeasurementUnit unit) {
return (unit != null) ? unit.apiName() : MeasurementUnit.NONE;
}

private static String GetTagsKey(final @Nullable Map<String, String> tags) {
@NotNull
private static String getTagsKey(final @Nullable Map<String, String> tags) {
if (tags == null || tags.isEmpty()) {
return "";
}
Expand Down Expand Up @@ -197,7 +220,7 @@ public static void encodeMetrics(
final @NotNull Collection<Metric> metrics,
final @NotNull StringBuilder writer) {
for (Metric metric : metrics) {
writer.append(sanitizeKey(metric.getKey()));
writer.append(sanitizeName(metric.getKey()));
writer.append("@");

final @Nullable MeasurementUnit unit = metric.getUnit();
Expand All @@ -218,15 +241,15 @@ public static void encodeMetrics(
writer.append("|#");
boolean first = true;
for (final @NotNull Map.Entry<String, String> tag : tags.entrySet()) {
final @NotNull String tagKey = sanitizeKey(tag.getKey());
final @NotNull String tagKey = sanitizeTagKey(tag.getKey());
if (first) {
first = false;
} else {
writer.append(",");
}
writer.append(tagKey);
writer.append(":");
writer.append(sanitizeValue(tag.getValue()));
writer.append(sanitizeTagValue(tag.getValue()));
}
}

Expand All @@ -236,11 +259,6 @@ public static void encodeMetrics(
}
}

@NotNull
public static String sanitizeUnit(@NotNull String unit) {
return INVALID_METRIC_UNIT_CHARACTERS_PATTERN.matcher(unit).replaceAll("_");
}

@NotNull
public static Map<String, String> mergeTags(
final @Nullable Map<String, String> tags, final @NotNull Map<String, String> defaultTags) {
Expand Down
51 changes: 33 additions & 18 deletions sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -59,31 +59,46 @@ class MetricsHelperTest {
}

@Test
fun sanitizeKey() {
assertEquals("foo-bar", MetricsHelper.sanitizeKey("foo-bar"))
assertEquals("foo_bar", MetricsHelper.sanitizeKey("foo\$\$\$bar"))
assertEquals("fo_-bar", MetricsHelper.sanitizeKey("foö-bar"))
fun sanitizeName() {
assertEquals("foo-bar", MetricsHelper.sanitizeName("foo-bar"))
assertEquals("foo_bar", MetricsHelper.sanitizeName("foo\$\$\$bar"))
assertEquals("fo_-bar", MetricsHelper.sanitizeName("foö-bar"))
}

@Test
fun sanitizeValue() {
assertEquals("\$foo", MetricsHelper.sanitizeValue("%\$foo"))
assertEquals("blah{}", MetricsHelper.sanitizeValue("blah{}"))
assertEquals("snwmn", MetricsHelper.sanitizeValue("snöwmän"))
assertEquals("j e n g a", MetricsHelper.sanitizeValue("j e n g a!"))
fun sanitizeTagKey() {
// no replacement characters for tag keys
// - and / should be allowed
assertEquals("a/weird/tag-key/", MetricsHelper.sanitizeTagKey("a/weird/tag-key/:ä"))
}

@Test
fun sanitizeUnit() {
val items = listOf(
"Test123_." to "Test123_.",
"test{value}" to "test_value_",
"test-value" to "test_value"
)
fun sanitizeTagValue() {
// https://github.com/getsentry/relay/blob/3208e3ce5b1fe4d147aa44e0e966807c256993de/relay-metrics/src/protocol.rs#L142
assertEquals("plain", MetricsHelper.sanitizeTagValue("plain"))
assertEquals("plain text", MetricsHelper.sanitizeTagValue("plain text"))
assertEquals("plain%text", MetricsHelper.sanitizeTagValue("plain%text"))

// Escape sequences
assertEquals("plain \\\\ text", MetricsHelper.sanitizeTagValue("plain \\ text"))
assertEquals("plain\\u{2c}text", MetricsHelper.sanitizeTagValue("plain,text"))
assertEquals("plain\\u{7c}text", MetricsHelper.sanitizeTagValue("plain|text"))
assertEquals("plain 😅", MetricsHelper.sanitizeTagValue("plain 😅"))

// Escapable control characters (may be stripped by the parser)
assertEquals("plain\\ntext", MetricsHelper.sanitizeTagValue("plain\ntext"))
assertEquals("plain\\rtext", MetricsHelper.sanitizeTagValue("plain\rtext"))
assertEquals("plain\\ttext", MetricsHelper.sanitizeTagValue("plain\ttext"))

// Unescapable control characters remain, as they'll be stripped by relay
assertEquals("plain\u0007text", MetricsHelper.sanitizeTagValue("plain\u0007text"))
assertEquals("plain\u009Ctext", MetricsHelper.sanitizeTagValue("plain\u009ctext"))
}

for (item in items) {
assertEquals(item.second, MetricsHelper.sanitizeUnit(item.first))
}
@Test
fun sanitizeUnit() {
// no replacement characters for units
assertEquals("abcABC123_abcABC123", MetricsHelper.sanitizeUnit("abcABC123_-./äöü\$%&abcABC123"))
}

@Test
Expand Down

0 comments on commit e74c05b

Please sign in to comment.