Skip to content

Commit

Permalink
feat(metrics): New normalization of keys, values, units (#2946)
Browse files Browse the repository at this point in the history
  • Loading branch information
sentrivana authored Apr 10, 2024
1 parent a584653 commit fab65e6
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 41 deletions.
39 changes: 33 additions & 6 deletions sentry_sdk/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@


_in_metrics = ContextVar("in_metrics", default=False)
_sanitize_key = partial(re.compile(r"[^a-zA-Z0-9_/.-]+").sub, "_")
_sanitize_value = partial(re.compile(r"[^\w\d\s_:/@\.{}\[\]$-]+", re.UNICODE).sub, "")
_set = set # set is shadowed below

GOOD_TRANSACTION_SOURCES = frozenset(
Expand All @@ -67,6 +65,32 @@
]
)

_sanitize_unit = partial(re.compile(r"[^a-zA-Z0-9_]+").sub, "")
_sanitize_metric_key = partial(re.compile(r"[^a-zA-Z0-9_\-.]+").sub, "_")
_sanitize_tag_key = partial(re.compile(r"[^a-zA-Z0-9_\-.\/]+").sub, "")
_TAG_VALUE_SANITIZATION_TABLE = {
"\n": "\\n",
"\r": "\\r",
"\t": "\\t",
"\\": "\\\\",
"|": "\\u{7c}",
",": "\\u{2c}",
}


def _sanitize_tag_value(value):
# type: (str) -> str
return "".join(
[
(
_TAG_VALUE_SANITIZATION_TABLE[char]
if char in _TAG_VALUE_SANITIZATION_TABLE
else char
)
for char in value
]
)


def get_code_location(stacklevel):
# type: (int) -> Optional[Dict[str, Any]]
Expand Down Expand Up @@ -269,7 +293,8 @@ def _encode_metrics(flushable_buckets):
for timestamp, buckets in flushable_buckets:
for bucket_key, metric in iteritems(buckets):
metric_type, metric_name, metric_unit, metric_tags = bucket_key
metric_name = _sanitize_key(metric_name)
metric_name = _sanitize_metric_key(metric_name)
metric_unit = _sanitize_unit(metric_unit)
_write(metric_name.encode("utf-8"))
_write(b"@")
_write(metric_unit.encode("utf-8"))
Expand All @@ -285,7 +310,7 @@ def _encode_metrics(flushable_buckets):
_write(b"|#")
first = True
for tag_key, tag_value in metric_tags:
tag_key = _sanitize_key(tag_key)
tag_key = _sanitize_tag_key(tag_key)
if not tag_key:
continue
if first:
Expand All @@ -294,7 +319,7 @@ def _encode_metrics(flushable_buckets):
_write(b",")
_write(tag_key.encode("utf-8"))
_write(b":")
_write(_sanitize_value(tag_value).encode("utf-8"))
_write(_sanitize_tag_value(tag_value).encode("utf-8"))

_write(b"|T")
_write(str(timestamp).encode("ascii"))
Expand All @@ -309,7 +334,9 @@ def _encode_locations(timestamp, code_locations):

for key, loc in code_locations:
metric_type, name, unit = key
mri = "{}:{}@{}".format(metric_type, _sanitize_key(name), unit)
mri = "{}:{}@{}".format(
metric_type, _sanitize_metric_key(name), _sanitize_unit(unit)
)

loc["type"] = "location"
mapping.setdefault(mri, []).append(loc)
Expand Down
113 changes: 78 additions & 35 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,56 +677,99 @@ def test_metric_summaries(

@minimum_python_37_with_gevent
@pytest.mark.forked
def test_tag_normalization(
sentry_init, capture_envelopes, maybe_monkeypatched_threading
@pytest.mark.parametrize(
"metric_name,metric_unit,expected_name",
[
("first-metric", "nano-second", "first-metric@nanosecond"),
("another_metric?", "nano second", "another_metric_@nanosecond"),
(
"metric",
"nanosecond",
"metric@nanosecond",
),
(
"my.amaze.metric I guess",
"nano|\nsecond",
"my.amaze.metric_I_guess@nanosecond",
),
# fmt: off
(u"métríc", u"nanöseconď", u"m_tr_c@nansecon"),
# fmt: on
],
)
def test_metric_name_normalization(
sentry_init,
capture_envelopes,
metric_name,
metric_unit,
expected_name,
maybe_monkeypatched_threading,
):
sentry_init(
release="[email protected]",
environment="not-fun-env",
_experiments={"enable_metrics": True, "metric_code_locations": False},
)
ts = time.time()
envelopes = capture_envelopes()

# fmt: off
metrics.distribution("a", 1.0, tags={"foo-bar": "%$foo"}, timestamp=ts)
metrics.distribution("b", 1.0, tags={"foo$$$bar": "blah{}"}, timestamp=ts)
metrics.distribution("c", 1.0, tags={u"foö-bar": u"snöwmän"}, timestamp=ts)
metrics.distribution("d", 1.0, tags={"route": "GET /foo"}, timestamp=ts)
# fmt: on
metrics.distribution(metric_name, 1.0, unit=metric_unit)

Hub.current.flush()

(envelope,) = envelopes

assert len(envelope.items) == 1
assert envelope.items[0].headers["type"] == "statsd"
m = parse_metrics(envelope.items[0].payload.get_bytes())

assert len(m) == 4
assert m[0][4] == {
"foo-bar": "$foo",
"release": "[email protected]",
"environment": "not-fun-env",
}
parsed_metrics = parse_metrics(envelope.items[0].payload.get_bytes())
assert len(parsed_metrics) == 1

assert m[1][4] == {
"foo_bar": "blah{}",
"release": "[email protected]",
"environment": "not-fun-env",
}
name = parsed_metrics[0][1]
assert name == expected_name

# fmt: off
assert m[2][4] == {
"fo_-bar": u"snöwmän",
"release": "[email protected]",
"environment": "not-fun-env",
}
assert m[3][4] == {
"release": "[email protected]",
"environment": "not-fun-env",
"route": "GET /foo",
}
# fmt: on

@minimum_python_37_with_gevent
@pytest.mark.forked
@pytest.mark.parametrize(
"metric_tag,expected_tag",
[
({"f-oo|bar": "%$foo/"}, {"f-oobar": "%$foo/"}),
({"foo$.$.$bar": "blah{}"}, {"foo..bar": "blah{}"}),
# fmt: off
({u"foö-bar": u"snöwmän"}, {u"fo-bar": u"snöwmän"},),
# fmt: on
({"route": "GET /foo"}, {"route": "GET /foo"}),
({"__bar__": "this | or , that"}, {"__bar__": "this \\u{7c} or \\u{2c} that"}),
({"foo/": "hello!\n\r\t\\"}, {"foo/": "hello!\\n\\r\\t\\\\"}),
],
)
def test_metric_tag_normalization(
sentry_init,
capture_envelopes,
metric_tag,
expected_tag,
maybe_monkeypatched_threading,
):
sentry_init(
_experiments={"enable_metrics": True, "metric_code_locations": False},
)
envelopes = capture_envelopes()

metrics.distribution("a", 1.0, tags=metric_tag)

Hub.current.flush()

(envelope,) = envelopes

assert len(envelope.items) == 1
assert envelope.items[0].headers["type"] == "statsd"

parsed_metrics = parse_metrics(envelope.items[0].payload.get_bytes())
assert len(parsed_metrics) == 1

tags = parsed_metrics[0][4]

expected_tag_key, expected_tag_value = expected_tag.popitem()
assert expected_tag_key in tags
assert tags[expected_tag_key] == expected_tag_value


@minimum_python_37_with_gevent
Expand Down

0 comments on commit fab65e6

Please sign in to comment.