-
Notifications
You must be signed in to change notification settings - Fork 515
/
tracing_utils.py
412 lines (325 loc) · 12.4 KB
/
tracing_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
import re
import contextlib
import json
import math
from numbers import Real
import sentry_sdk
from sentry_sdk.utils import (
capture_internal_exceptions,
Dsn,
logger,
safe_str,
to_base64,
to_string,
from_base64,
)
from sentry_sdk._compat import PY2
from sentry_sdk._types import MYPY
if PY2:
from collections import Mapping
else:
from collections.abc import Mapping
if MYPY:
import typing
from typing import Generator
from typing import Optional
from typing import Any
from typing import Dict
from typing import Union
SENTRY_TRACE_REGEX = re.compile(
"^[ \t]*" # whitespace
"([0-9a-f]{32})?" # trace_id
"-?([0-9a-f]{16})?" # span_id
"-?([01])?" # sampled
"[ \t]*$" # whitespace
)
# This is a normal base64 regex, modified to reflect that fact that we strip the
# trailing = or == off
base64_stripped = (
# any of the characters in the base64 "alphabet", in multiples of 4
"([a-zA-Z0-9+/]{4})*"
# either nothing or 2 or 3 base64-alphabet characters (see
# https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding for
# why there's never only 1 extra character)
"([a-zA-Z0-9+/]{2,3})?"
)
# comma-delimited list of entries of the form `xxx=yyy`
tracestate_entry = "[^=]+=[^=]+"
TRACESTATE_ENTRIES_REGEX = re.compile(
# one or more xxxxx=yyyy entries
"^({te})+"
# each entry except the last must be followed by a comma
"(,|$)".format(te=tracestate_entry)
)
# this doesn't check that the value is valid, just that there's something there
# of the form `sentry=xxxx`
SENTRY_TRACESTATE_ENTRY_REGEX = re.compile(
# either sentry is the first entry or there's stuff immediately before it,
# ending in a comma (this prevents matching something like `coolsentry=xxx`)
"(?:^|.+,)"
# sentry's part, not including the potential comma
"(sentry=[^,]*)"
# either there's a comma and another vendor's entry or we end
"(?:,.+|$)"
)
class EnvironHeaders(Mapping): # type: ignore
def __init__(
self,
environ, # type: typing.Mapping[str, str]
prefix="HTTP_", # type: str
):
# type: (...) -> None
self.environ = environ
self.prefix = prefix
def __getitem__(self, key):
# type: (str) -> Optional[Any]
return self.environ[self.prefix + key.replace("-", "_").upper()]
def __len__(self):
# type: () -> int
return sum(1 for _ in iter(self))
def __iter__(self):
# type: () -> Generator[str, None, None]
for k in self.environ:
if not isinstance(k, str):
continue
k = k.replace("-", "_").upper()
if not k.startswith(self.prefix):
continue
yield k[len(self.prefix) :]
def has_tracing_enabled(options):
# type: (Dict[str, Any]) -> bool
"""
Returns True if either traces_sample_rate or traces_sampler is
defined, False otherwise.
"""
return bool(
options.get("traces_sample_rate") is not None
or options.get("traces_sampler") is not None
)
def is_valid_sample_rate(rate):
# type: (Any) -> bool
"""
Checks the given sample rate to make sure it is valid type and value (a
boolean or a number between 0 and 1, inclusive).
"""
# both booleans and NaN are instances of Real, so a) checking for Real
# checks for the possibility of a boolean also, and b) we have to check
# separately for NaN
if not isinstance(rate, Real) or math.isnan(rate):
logger.warning(
"[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format(
rate=rate, type=type(rate)
)
)
return False
# in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False
rate = float(rate)
if rate < 0 or rate > 1:
logger.warning(
"[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format(
rate=rate
)
)
return False
return True
@contextlib.contextmanager
def record_sql_queries(
hub, # type: sentry_sdk.Hub
cursor, # type: Any
query, # type: Any
params_list, # type: Any
paramstyle, # type: Optional[str]
executemany, # type: bool
):
# type: (...) -> Generator[Span, None, None]
# TODO: Bring back capturing of params by default
if hub.client and hub.client.options["_experiments"].get(
"record_sql_params", False
):
if not params_list or params_list == [None]:
params_list = None
if paramstyle == "pyformat":
paramstyle = "format"
else:
params_list = None
paramstyle = None
query = _format_sql(cursor, query)
data = {}
if params_list is not None:
data["db.params"] = params_list
if paramstyle is not None:
data["db.paramstyle"] = paramstyle
if executemany:
data["db.executemany"] = True
with capture_internal_exceptions():
hub.add_breadcrumb(message=query, category="query", data=data)
with hub.start_span(op="db", description=query) as span:
for k, v in data.items():
span.set_data(k, v)
yield span
def maybe_create_breadcrumbs_from_span(hub, span):
# type: (sentry_sdk.Hub, Span) -> None
if span.op == "redis":
hub.add_breadcrumb(
message=span.description, type="redis", category="redis", data=span._tags
)
elif span.op == "http":
hub.add_breadcrumb(type="http", category="httplib", data=span._data)
elif span.op == "subprocess":
hub.add_breadcrumb(
type="subprocess",
category="subprocess",
message=span.description,
data=span._data,
)
def extract_sentrytrace_data(header):
# type: (Optional[str]) -> typing.Mapping[str, Union[str, bool, None]]
"""
Given a `sentry-trace` header string, return a dictionary of data.
"""
trace_id = parent_span_id = parent_sampled = None
if header:
if header.startswith("00-") and header.endswith("-00"):
header = header[3:-3]
match = SENTRY_TRACE_REGEX.match(header)
if match:
trace_id, parent_span_id, sampled_str = match.groups()
if trace_id:
trace_id = "{:032x}".format(int(trace_id, 16))
if parent_span_id:
parent_span_id = "{:016x}".format(int(parent_span_id, 16))
if sampled_str:
parent_sampled = sampled_str != "0"
return {
"trace_id": trace_id,
"parent_span_id": parent_span_id,
"parent_sampled": parent_sampled,
}
def extract_tracestate_data(header):
# type: (Optional[str]) -> typing.Mapping[str, Optional[str]]
"""
Extracts the sentry tracestate value and any third-party data from the given
tracestate header, returning a dictionary of data.
"""
sentry_entry = third_party_entry = None
before = after = ""
if header:
# find sentry's entry, if any
sentry_match = SENTRY_TRACESTATE_ENTRY_REGEX.search(header)
if sentry_match:
sentry_entry = sentry_match.group(1)
# remove the commas after the split so we don't end up with
# `xxx=yyy,,zzz=qqq` (double commas) when we put them back together
before, after = map(lambda s: s.strip(","), header.split(sentry_entry))
# extract sentry's value from its entry and test to make sure it's
# valid; if it isn't, discard the entire entry so that a new one
# will be created
sentry_value = sentry_entry.replace("sentry=", "")
if not re.search("^{b64}$".format(b64=base64_stripped), sentry_value):
sentry_entry = None
else:
after = header
# if either part is invalid or empty, remove it before gluing them together
third_party_entry = (
",".join(filter(TRACESTATE_ENTRIES_REGEX.search, [before, after])) or None
)
return {
"sentry_tracestate": sentry_entry,
"third_party_tracestate": third_party_entry,
}
def compute_tracestate_value(data):
# type: (typing.Mapping[str, str]) -> str
"""
Computes a new tracestate value using the given data.
Note: Returns just the base64-encoded data, NOT the full `sentry=...`
tracestate entry.
"""
tracestate_json = json.dumps(data, default=safe_str)
# Base64-encoded strings always come out with a length which is a multiple
# of 4. In order to achieve this, the end is padded with one or more `=`
# signs. Because the tracestate standard calls for using `=` signs between
# vendor name and value (`sentry=xxx,dogsaregreat=yyy`), to avoid confusion
# we strip the `=`
return (to_base64(tracestate_json) or "").rstrip("=")
def compute_tracestate_entry(span):
# type: (Span) -> Optional[str]
"""
Computes a new sentry tracestate for the span. Includes the `sentry=`.
Will return `None` if there's no client and/or no DSN.
"""
data = {}
hub = span.hub or sentry_sdk.Hub.current
client = hub.client
scope = hub.scope
if client and client.options.get("dsn"):
options = client.options
user = scope._user
data = {
"trace_id": span.trace_id,
"environment": options["environment"],
"release": options.get("release"),
"public_key": Dsn(options["dsn"]).public_key,
}
if user and (user.get("id") or user.get("segment")):
user_data = {}
if user.get("id"):
user_data["id"] = user["id"]
if user.get("segment"):
user_data["segment"] = user["segment"]
data["user"] = user_data
if span.containing_transaction:
data["transaction"] = span.containing_transaction.name
return "sentry=" + compute_tracestate_value(data)
return None
def reinflate_tracestate(encoded_tracestate):
# type: (str) -> typing.Optional[Mapping[str, str]]
"""
Given a sentry tracestate value in its encoded form, translate it back into
a dictionary of data.
"""
inflated_tracestate = None
if encoded_tracestate:
# Base64-encoded strings always come out with a length which is a
# multiple of 4. In order to achieve this, the end is padded with one or
# more `=` signs. Because the tracestate standard calls for using `=`
# signs between vendor name and value (`sentry=xxx,dogsaregreat=yyy`),
# to avoid confusion we strip the `=` when the data is initially
# encoded. Python's decoding function requires they be put back.
# Fortunately, it doesn't complain if there are too many, so we just
# attach two `=` on spec (there will never be more than 2, see
# https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding).
tracestate_json = from_base64(encoded_tracestate + "==")
try:
assert tracestate_json is not None
inflated_tracestate = json.loads(tracestate_json)
except Exception as err:
logger.warning(
(
"Unable to attach tracestate data to envelope header: {err}"
+ "\nTracestate value is {encoded_tracestate}"
).format(err=err, encoded_tracestate=encoded_tracestate),
)
return inflated_tracestate
def _format_sql(cursor, sql):
# type: (Any, str) -> Optional[str]
real_sql = None
# If we're using psycopg2, it could be that we're
# looking at a query that uses Composed objects. Use psycopg2's mogrify
# function to format the query. We lose per-parameter trimming but gain
# accuracy in formatting.
try:
if hasattr(cursor, "mogrify"):
real_sql = cursor.mogrify(sql)
if isinstance(real_sql, bytes):
real_sql = real_sql.decode(cursor.connection.encoding)
except Exception:
real_sql = None
return real_sql or to_string(sql)
def has_tracestate_enabled(span=None):
# type: (Optional[Span]) -> bool
client = ((span and span.hub) or sentry_sdk.Hub.current).client
options = client and client.options
return bool(options and options["_experiments"].get("propagate_tracestate"))
# Circular imports
if MYPY:
from sentry_sdk.tracing import Span