Skip to content

Commit

Permalink
Fix escaping bugs
Browse files Browse the repository at this point in the history
Signed-off-by: Federico Torres <[email protected]>
  • Loading branch information
fedetorres93 committed Jan 21, 2025
1 parent 8444ae9 commit a593d4e
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import org.testcontainers.containers.BindMode;
import org.testcontainers.containers.GenericContainer;

public abstract class ExporterTest {
public abstract class ExporterTest {
private final GenericContainer<?> sampleAppContainer;
private final Volume sampleAppVolume;
protected final String sampleApp;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static java.lang.Character.MAX_LOW_SURROGATE;
import static java.lang.Character.MIN_HIGH_SURROGATE;
import static java.lang.Character.*;

/**
* Utility for Prometheus Metric and Label naming.
Expand Down Expand Up @@ -45,8 +44,6 @@ public class PrometheusNaming {
*/
public static final String ESCAPING_KEY = "escaping";

private static final String LOWERHEX = "0123456789abcdef";

private static final String METRIC_NAME_LABEL= "__name__";

/** Legal characters for metric names, including dot. */
Expand Down Expand Up @@ -586,54 +583,51 @@ public static String escapeName(String name, EscapingScheme scheme) {
if (isValidLegacyMetricName(name)) {
return name;
}
for (int i = 0; i < name.length(); i++) {
char c = name.charAt(i);
for (int i = 0; i < name.length(); ) {
int c = name.codePointAt(i);
if (isValidLegacyChar(c, i)) {
escaped.append(c);
escaped.appendCodePoint(c);
} else {
escaped.append('_');
}
i += Character.charCount(c);
}
return escaped.toString();
case DOTS_ESCAPING:
// Do not early return for legacy valid names, we still escape underscores.
for (int i = 0; i < name.length(); i++) {
char c = name.charAt(i);
for (int i = 0; i < name.length(); ) {
int c = name.codePointAt(i);
if (c == '_') {
escaped.append("__");
} else if (c == '.') {
escaped.append("_dot_");
} else if (isValidLegacyChar(c, i)) {
escaped.append(c);
escaped.appendCodePoint(c);
} else {
escaped.append('_');
escaped.append("__");
}
i += Character.charCount(c);
}
return escaped.toString();
case VALUE_ENCODING_ESCAPING:
if (isValidLegacyMetricName(name)) {
return name;
}
escaped.append("U__");
for (int i = 0; i < name.length(); i++) {
char c = name.charAt(i);
if (isValidLegacyChar(c, i)) {
escaped.append(c);
for (int i = 0; i < name.length(); ) {
int c = name.codePointAt(i);
if (c == '_') {
escaped.append("__");
} else if (isValidLegacyChar(c, i)) {
escaped.appendCodePoint(c);
} else if (!isValidUTF8Char(c)) {
escaped.append("_FFFD_");
} else if (c < 0x100) {
escaped.append('_');
for (int s = 4; s >= 0; s -= 4) {
escaped.append(LOWERHEX.charAt((c >> s) & 0xF));
}
escaped.append('_');
} else {
escaped.append('_');
for (int s = 12; s >= 0; s -= 4) {
escaped.append(LOWERHEX.charAt((c >> s) & 0xF));
}
escaped.append(Integer.toHexString(c));
escaped.append('_');
}
i += Character.charCount(c);
}
return escaped.toString();
default:
Expand Down Expand Up @@ -666,37 +660,42 @@ static String unescapeName(String name, EscapingScheme scheme) {
if (matcher.find()) {
String escapedName = name.substring(matcher.end());
StringBuilder unescaped = new StringBuilder();
TOP:
for (int i = 0; i < escapedName.length(); i++) {
for (int i = 0; i < escapedName.length(); ) {
// All non-underscores are treated normally.
if (escapedName.charAt(i) != '_') {
unescaped.append(escapedName.charAt(i));
int c = escapedName.codePointAt(i);
if (c != '_') {
unescaped.appendCodePoint(c);
i += Character.charCount(c);
continue;
}
i++;
if (i >= escapedName.length()) {
return name;
}
// A double underscore is a single underscore.
if (escapedName.charAt(i) == '_') {
if (escapedName.codePointAt(i) == '_') {
unescaped.append('_');
i++;
continue;
}
// We think we are in a UTF-8 code, process it.
long utf8Val = 0;
int utf8Val = 0;
boolean foundClosingUnderscore = false;
for (int j = 0; i < escapedName.length(); j++) {
// This is too many characters for a UTF-8 value.
if (j > 4) {
if (j >= 6) {
return name;
}
// Found a closing underscore, convert to a char, check validity, and append.
if (escapedName.charAt(i) == '_') {
char utf8Char = (char) utf8Val;
if (!isValidUTF8Char(utf8Char)) {
if (escapedName.codePointAt(i) == '_') {
//char utf8Char = (char) utf8Val;
foundClosingUnderscore = true;
if (!isValidUTF8Char(utf8Val)) {
return name;
}
unescaped.append(utf8Char);
continue TOP;
unescaped.appendCodePoint(utf8Val);
i++;
break;
}
char r = Character.toLowerCase(escapedName.charAt(i));
utf8Val *= 16;
Expand All @@ -709,8 +708,9 @@ static String unescapeName(String name, EscapingScheme scheme) {
}
i++;
}
// Didn't find closing underscore, invalid.
return name;
if (!foundClosingUnderscore) {
return name;
}
}
return unescaped.toString();
} else {
Expand All @@ -721,12 +721,11 @@ static String unescapeName(String name, EscapingScheme scheme) {
}
}

static boolean isValidLegacyChar(char c, int i) {
static boolean isValidLegacyChar(int c, int i) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == ':' || (c >= '0' && c <= '9' && i > 0);
}

private static boolean isValidUTF8Char(char b) {
return ((b < MIN_HIGH_SURROGATE || b > MAX_LOW_SURROGATE) &&
(b < 0xFFFE));
private static boolean isValidUTF8Char(int c) {
return (0 <= c && c < MIN_HIGH_SURROGATE) || (MAX_LOW_SURROGATE < c && c <= MAX_CODE_POINT);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,22 @@ public void testEscapeName() {
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
assertThat(got).isEqualTo("mysystem.prod.west.cpu.load");

// name with dots and underscore
got = escapeName("mysystem.prod.west.cpu.load_total", EscapingScheme.UNDERSCORE_ESCAPING);
assertThat(got).isEqualTo("mysystem_prod_west_cpu_load_total");
got = unescapeName(got, EscapingScheme.UNDERSCORE_ESCAPING);
assertThat(got).isEqualTo("mysystem_prod_west_cpu_load_total");

got = escapeName("mysystem.prod.west.cpu.load_total", EscapingScheme.DOTS_ESCAPING);
assertThat(got).isEqualTo("mysystem_dot_prod_dot_west_dot_cpu_dot_load__total");
got = unescapeName(got, EscapingScheme.DOTS_ESCAPING);
assertThat(got).isEqualTo("mysystem.prod.west.cpu.load_total");

got = escapeName("mysystem.prod.west.cpu.load_total", EscapingScheme.VALUE_ENCODING_ESCAPING);
assertThat(got).isEqualTo("U__mysystem_2e_prod_2e_west_2e_cpu_2e_load__total");
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
assertThat(got).isEqualTo("mysystem.prod.west.cpu.load_total");

// name with dots and colon
got = escapeName("http.status:sum", EscapingScheme.UNDERSCORE_ESCAPING);
assertThat(got).isEqualTo("http_status:sum");
Expand All @@ -222,24 +238,56 @@ public void testEscapeName() {
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
assertThat(got).isEqualTo("http.status:sum");

// name with spaces and emoji
got = escapeName("label with 😱", EscapingScheme.UNDERSCORE_ESCAPING);
assertThat(got).isEqualTo("label_with__");
got = unescapeName(got, EscapingScheme.UNDERSCORE_ESCAPING);
assertThat(got).isEqualTo("label_with__");

got = escapeName("label with 😱", EscapingScheme.DOTS_ESCAPING);
assertThat(got).isEqualTo("label__with____");
got = unescapeName(got, EscapingScheme.DOTS_ESCAPING);
assertThat(got).isEqualTo("label_with__");

got = escapeName("label with 😱", EscapingScheme.VALUE_ENCODING_ESCAPING);
assertThat(got).isEqualTo("U__label_20_with_20__1f631_");
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
assertThat(got).isEqualTo("label with 😱");

// name with unicode characters > 0x100
got = escapeName("花火", EscapingScheme.UNDERSCORE_ESCAPING);
assertThat(got).isEqualTo("__");
got = unescapeName(got, EscapingScheme.UNDERSCORE_ESCAPING);
assertThat(got).isEqualTo("__");

got = escapeName("花火", EscapingScheme.DOTS_ESCAPING);
assertThat(got).isEqualTo("__");
assertThat(got).isEqualTo("____");
got = unescapeName(got, EscapingScheme.DOTS_ESCAPING);
// Dots-replacement does not know the difference between two replaced
// characters and a single underscore.
assertThat(got).isEqualTo("_");
assertThat(got).isEqualTo("__");

got = escapeName("花火", EscapingScheme.VALUE_ENCODING_ESCAPING);
assertThat(got).isEqualTo("U___82b1__706b_");
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
assertThat(got).isEqualTo("花火");

// name with spaces and edge-case value
got = escapeName("label with Ā", EscapingScheme.UNDERSCORE_ESCAPING);
assertThat(got).isEqualTo("label_with__");
got = unescapeName(got, EscapingScheme.UNDERSCORE_ESCAPING);
assertThat(got).isEqualTo("label_with__");

got = escapeName("label with Ā", EscapingScheme.DOTS_ESCAPING);
assertThat(got).isEqualTo("label__with____");
got = unescapeName(got, EscapingScheme.DOTS_ESCAPING);
assertThat(got).isEqualTo("label_with__");

got = escapeName("label with Ā", EscapingScheme.VALUE_ENCODING_ESCAPING);
assertThat(got).isEqualTo("U__label_20_with_20__100_");
got = unescapeName(got, EscapingScheme.VALUE_ENCODING_ESCAPING);
assertThat(got).isEqualTo("label with Ā");

nameValidationScheme = ValidationScheme.LEGACY_VALIDATION;
}

Expand Down Expand Up @@ -435,13 +483,13 @@ public void testEscapeMetricSnapshotGaugeEscapingNeeded() {
.build();
MetricSnapshot got = escapeMetricSnapshot(original, EscapingScheme.DOTS_ESCAPING);

assertThat(got.getMetadata().getName()).isEqualTo("unicode_dot_and_dot_dots_dot___");
assertThat(got.getMetadata().getName()).isEqualTo("unicode_dot_and_dot_dots_dot_____");
assertThat(got.getMetadata().getHelp()).isEqualTo("some help text");
assertThat(got.getDataPoints().size()).isEqualTo(1);
GaugeSnapshot.GaugeDataPointSnapshot data = (GaugeSnapshot.GaugeDataPointSnapshot) got.getDataPoints().get(0);
assertThat(data.getValue()).isEqualTo(34.2);
assertThat((Iterable<? extends Label>) data.getLabels()).isEqualTo(Labels.builder()
.label("__name__", "unicode_dot_and_dot_dots_dot___")
.label("__name__", "unicode_dot_and_dot_dots_dot_____")
.label("some_label", "label??value")
.build());
assertThat(original.getMetadata().getName()).isEqualTo("unicode.and.dots.花火");
Expand Down

0 comments on commit a593d4e

Please sign in to comment.