-
Notifications
You must be signed in to change notification settings - Fork 647
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implementing W3C TraceContext (fixes #116) #180
Changes from all commits
b01a50c
9d462c7
4d5b716
b2b8807
a9f939e
21b4681
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -12,28 +12,151 @@ | |||||
# See the License for the specific language governing permissions and | ||||||
# limitations under the License. | ||||||
# | ||||||
|
||||||
import re | ||||||
import typing | ||||||
|
||||||
import opentelemetry.trace as trace | ||||||
from opentelemetry.context.propagation import httptextformat | ||||||
|
||||||
_T = typing.TypeVar("_T") | ||||||
|
||||||
# Keys and values are strings of up to 256 printable US-ASCII characters. | ||||||
# Implementations should conform to the the `W3C Trace Context - Tracestate`_ | ||||||
# spec, which describes additional restrictions on valid field values. | ||||||
# | ||||||
# .. _W3C Trace Context - Tracestate: | ||||||
# https://www.w3.org/TR/trace-context/#tracestate-field | ||||||
|
||||||
|
||||||
_KEY_WITHOUT_VENDOR_FORMAT = r"[a-z][_0-9a-z\-\*\/]{0,255}" | ||||||
_KEY_WITH_VENDOR_FORMAT = ( | ||||||
r"[a-z][_0-9a-z\-\*\/]{0,240}@[a-z][_0-9a-z\-\*\/]{0,13}" | ||||||
) | ||||||
|
||||||
_KEY_FORMAT = _KEY_WITHOUT_VENDOR_FORMAT + "|" + _KEY_WITH_VENDOR_FORMAT | ||||||
_VALUE_FORMAT = ( | ||||||
r"[\x20-\x2b\x2d-\x3c\x3e-\x7e]{0,255}[\x21-\x2b\x2d-\x3c\x3e-\x7e]" | ||||||
) | ||||||
|
||||||
_DELIMITER_FORMAT = "[ \t]*,[ \t]*" | ||||||
_MEMBER_FORMAT = "({})(=)({})".format(_KEY_FORMAT, _VALUE_FORMAT) | ||||||
|
||||||
_DELIMITER_FORMAT_RE = re.compile(_DELIMITER_FORMAT) | ||||||
_MEMBER_FORMAT_RE = re.compile(_MEMBER_FORMAT) | ||||||
|
||||||
|
||||||
class TraceContextHTTPTextFormat(httptextformat.HTTPTextFormat): | ||||||
"""TODO: extracts and injects using w3c TraceContext's headers. | ||||||
"""Extracts and injects using w3c TraceContext's headers. | ||||||
""" | ||||||
|
||||||
_TRACEPARENT_HEADER_NAME = "traceparent" | ||||||
_TRACESTATE_HEADER_NAME = "tracestate" | ||||||
_TRACEPARENT_HEADER_FORMAT = ( | ||||||
"^[ \t]*([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})" | ||||||
+ "(-.*)?[ \t]*$" | ||||||
) | ||||||
_TRACEPARENT_HEADER_FORMAT_RE = re.compile(_TRACEPARENT_HEADER_FORMAT) | ||||||
|
||||||
@classmethod | ||||||
def extract( | ||||||
self, _get_from_carrier: httptextformat.Getter[_T], _carrier: _T | ||||||
cls, get_from_carrier: httptextformat.Getter[_T], carrier: _T | ||||||
) -> trace.SpanContext: | ||||||
return trace.INVALID_SPAN_CONTEXT | ||||||
"""Extracts a valid SpanContext from the carrier. | ||||||
""" | ||||||
header = get_from_carrier(carrier, cls._TRACEPARENT_HEADER_NAME) | ||||||
|
||||||
if not header: | ||||||
return trace.INVALID_SPAN_CONTEXT | ||||||
|
||||||
match = re.search(cls._TRACEPARENT_HEADER_FORMAT_RE, header[0]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
EDIT: I see now you have used |
||||||
if not match: | ||||||
return trace.INVALID_SPAN_CONTEXT | ||||||
|
||||||
version = match.group(1) | ||||||
trace_id = match.group(2) | ||||||
span_id = match.group(3) | ||||||
trace_options = match.group(4) | ||||||
|
||||||
if trace_id == "0" * 32 or span_id == "0" * 16: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this check is very useful here. I'd only check after constructing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That seems ok. The one point there is we have to reject all values if either is invalid, as well as this skips some level of processing. But should be fine. |
||||||
return trace.INVALID_SPAN_CONTEXT | ||||||
|
||||||
if version == "00": | ||||||
if match.group(5): | ||||||
return trace.INVALID_SPAN_CONTEXT | ||||||
if version == "ff": | ||||||
return trace.INVALID_SPAN_CONTEXT | ||||||
|
||||||
tracestate = trace.TraceState() | ||||||
for tracestate_header in get_from_carrier( | ||||||
carrier, cls._TRACESTATE_HEADER_NAME | ||||||
): | ||||||
# typing.Dict's update is not recognized by pylint: | ||||||
# https://github.com/PyCQA/pylint/issues/2420 | ||||||
tracestate.update( # pylint:disable=E1101 | ||||||
_parse_tracestate(tracestate_header) | ||||||
) | ||||||
|
||||||
span_context = trace.SpanContext( | ||||||
trace_id=int(trace_id, 16), | ||||||
span_id=int(span_id, 16), | ||||||
trace_options=trace.TraceOptions(trace_options), | ||||||
trace_state=tracestate, | ||||||
) | ||||||
|
||||||
return span_context | ||||||
|
||||||
@classmethod | ||||||
def inject( | ||||||
self, | ||||||
cls, | ||||||
context: trace.SpanContext, | ||||||
set_in_carrier: httptextformat.Setter[_T], | ||||||
carrier: _T, | ||||||
) -> None: | ||||||
pass | ||||||
if context == trace.INVALID_SPAN_CONTEXT: | ||||||
return | ||||||
traceparent_string = "00-{:032x}-{:016x}-{:02x}".format( | ||||||
context.trace_id, context.span_id, context.trace_options | ||||||
) | ||||||
set_in_carrier( | ||||||
carrier, cls._TRACEPARENT_HEADER_NAME, traceparent_string | ||||||
) | ||||||
if context.trace_state: | ||||||
tracestate_string = _format_tracestate(context.trace_state) | ||||||
set_in_carrier( | ||||||
carrier, cls._TRACESTATE_HEADER_NAME, tracestate_string | ||||||
) | ||||||
|
||||||
|
||||||
def _parse_tracestate(string: str) -> trace.TraceState: | ||||||
"""Parse a w3c tracestate header into a TraceState. | ||||||
|
||||||
Args: | ||||||
string: the value of the tracestate header. | ||||||
|
||||||
Returns: | ||||||
A valid TraceState that contains values extracted from | ||||||
the tracestate header. | ||||||
""" | ||||||
tracestate = trace.TraceState() | ||||||
for member in re.split(_DELIMITER_FORMAT_RE, string): | ||||||
match = _MEMBER_FORMAT_RE.match(member) | ||||||
if not match: | ||||||
raise ValueError("illegal key-value format %r" % (member)) | ||||||
key, _eq, value = match.groups() | ||||||
# typing.Dict's update is not recognized by pylint: | ||||||
# https://github.com/PyCQA/pylint/issues/2420 | ||||||
tracestate[key] = value # pylint:disable=E1137 | ||||||
return tracestate | ||||||
|
||||||
|
||||||
def _format_tracestate(tracestate: trace.TraceState) -> str: | ||||||
"""Parse a w3c tracestate header into a TraceState. | ||||||
|
||||||
Args: | ||||||
tracestate: the tracestate header to write | ||||||
|
||||||
Returns: | ||||||
A string that adheres to the w3c tracestate | ||||||
header format. | ||||||
""" | ||||||
return ",".join(key + "=" + value for key, value in tracestate.items()) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
# Copyright 2019, OpenTelemetry Authors | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import typing | ||
import unittest | ||
|
||
from opentelemetry import trace | ||
from opentelemetry.context.propagation import tracecontexthttptextformat | ||
|
||
FORMAT = tracecontexthttptextformat.TraceContextHTTPTextFormat() | ||
|
||
|
||
def get_as_list( | ||
dict_object: typing.Dict[str, str], key: str | ||
) -> typing.List[str]: | ||
value = dict_object.get(key) | ||
return [value] if value is not None else [] | ||
|
||
|
||
class TestTraceContextFormat(unittest.TestCase): | ||
TRACE_ID = int("12345678901234567890123456789012", 16) # type:int | ||
SPAN_ID = int("1234567890123456", 16) # type:int | ||
|
||
def test_no_traceparent_header(self): | ||
"""When tracecontext headers are not present, a new SpanContext | ||
should be created. | ||
|
||
RFC 4.2.2: | ||
|
||
If no traceparent header is received, the vendor creates a new trace-id and parent-id that represents the current request. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you wrap these comments? |
||
""" | ||
output = {} # type:typing.Dict[str, str] | ||
span_context = FORMAT.extract(get_as_list, output) | ||
self.assertTrue(isinstance(span_context, trace.SpanContext)) | ||
|
||
def test_from_headers_tracestate_entry_limit(self): | ||
"""If more than 33 entries are passed, allow them. | ||
|
||
We are explicitly choosing not to limit the list members | ||
as outlined in RFC 3.3.1.1 | ||
|
||
RFC 3.3.1.1 | ||
|
||
There can be a maximum of 32 list-members in a list. | ||
""" | ||
|
||
span_context = FORMAT.extract( | ||
get_as_list, | ||
{ | ||
"traceparent": "00-12345678901234567890123456789012-1234567890123456-00", | ||
"tracestate": ",".join( | ||
[ | ||
"a00=0,a01=1,a02=2,a03=3,a04=4,a05=5,a06=6,a07=7,a08=8,a09=9", | ||
"b00=0,b01=1,b02=2,b03=3,b04=4,b05=5,b06=6,b07=7,b08=8,b09=9", | ||
"c00=0,c01=1,c02=2,c03=3,c04=4,c05=5,c06=6,c07=7,c08=8,c09=9", | ||
"d00=0,d01=1,d02=2", | ||
] | ||
), | ||
}, | ||
) | ||
self.assertEqual(len(span_context.trace_state), 33) | ||
|
||
def test_from_headers_tracestate_duplicated_keys(self): | ||
"""If a duplicate tracestate header is present, the most recent entry | ||
is used. | ||
|
||
RFC 3.3.1.4 | ||
|
||
Only one entry per key is allowed because the entry represents that last position in the trace. | ||
Hence vendors must overwrite their entry upon reentry to their tracing system. | ||
|
||
For example, if a vendor name is Congo and a trace started in their system and then went through | ||
a system named Rojo and later returned to Congo, the tracestate value would not be: | ||
|
||
congo=congosFirstPosition,rojo=rojosFirstPosition,congo=congosSecondPosition | ||
|
||
Instead, the entry would be rewritten to only include the most recent position: | ||
|
||
congo=congosSecondPosition,rojo=rojosFirstPosition | ||
""" | ||
span_context = FORMAT.extract( | ||
get_as_list, | ||
{ | ||
"traceparent": "00-12345678901234567890123456789012-1234567890123456-00", | ||
"tracestate": "foo=1,bar=2,foo=3", | ||
}, | ||
) | ||
self.assertEqual(span_context.trace_state, {"foo": "3", "bar": "2"}) | ||
|
||
def test_headers_with_tracestate(self): | ||
"""When there is a traceparent and tracestate header, data from | ||
both should be addded to the SpanContext. | ||
""" | ||
traceparent_value = "00-{trace_id}-{span_id}-00".format( | ||
trace_id=format(self.TRACE_ID, "032x"), | ||
span_id=format(self.SPAN_ID, "016x"), | ||
) | ||
tracestate_value = "foo=1,bar=2,baz=3" | ||
span_context = FORMAT.extract( | ||
get_as_list, | ||
{"traceparent": traceparent_value, "tracestate": tracestate_value}, | ||
) | ||
self.assertEqual(span_context.trace_id, self.TRACE_ID) | ||
self.assertEqual(span_context.span_id, self.SPAN_ID) | ||
self.assertEqual( | ||
span_context.trace_state, {"foo": "1", "bar": "2", "baz": "3"} | ||
) | ||
|
||
output = {} # type:typing.Dict[str, str] | ||
FORMAT.inject(span_context, dict.__setitem__, output) | ||
self.assertEqual(output["traceparent"], traceparent_value) | ||
for pair in ["foo=1", "bar=2", "baz=3"]: | ||
self.assertIn(pair, output["tracestate"]) | ||
self.assertEqual(output["tracestate"].count(","), 2) | ||
|
||
def test_invalid_trace_id(self): | ||
"""If the trace id is invalid, we must ignore the full traceparent header. | ||
|
||
Also ignore any tracestate. | ||
|
||
RFC 3.2.2.3 | ||
|
||
If the trace-id value is invalid (for example if it contains non-allowed characters or all | ||
zeros), vendors MUST ignore the traceparent. | ||
|
||
RFC 3.3 | ||
|
||
If the vendor failed to parse traceparent, it MUST NOT attempt to parse tracestate. | ||
Note that the opposite is not true: failure to parse tracestate MUST NOT affect the parsing of traceparent. | ||
""" | ||
span_context = FORMAT.extract( | ||
get_as_list, | ||
{ | ||
"traceparent": "00-00000000000000000000000000000000-1234567890123456-00", | ||
"tracestate": "foo=1,bar=2,foo=3", | ||
}, | ||
) | ||
self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT) | ||
|
||
def test_invalid_parent_id(self): | ||
"""If the parent id is invalid, we must ignore the full traceparent header. | ||
|
||
Also ignore any tracestate. | ||
|
||
RFC 3.2.2.3 | ||
|
||
Vendors MUST ignore the traceparent when the parent-id is invalid (for example, | ||
if it contains non-lowercase hex characters). | ||
|
||
RFC 3.3 | ||
|
||
If the vendor failed to parse traceparent, it MUST NOT attempt to parse tracestate. | ||
Note that the opposite is not true: failure to parse tracestate MUST NOT affect the parsing of traceparent. | ||
""" | ||
span_context = FORMAT.extract( | ||
get_as_list, | ||
{ | ||
"traceparent": "00-00000000000000000000000000000000-0000000000000000-00", | ||
"tracestate": "foo=1,bar=2,foo=3", | ||
}, | ||
) | ||
self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT) | ||
|
||
def test_no_send_empty_tracestate(self): | ||
"""If the tracestate is empty, do not set the header. | ||
|
||
RFC 3.3.1.1 | ||
|
||
Empty and whitespace-only list members are allowed. Vendors MUST accept empty | ||
tracestate headers but SHOULD avoid sending them. | ||
""" | ||
output = {} # type:typing.Dict[str, str] | ||
FORMAT.inject( | ||
trace.SpanContext(self.TRACE_ID, self.SPAN_ID), | ||
dict.__setitem__, | ||
output, | ||
) | ||
self.assertTrue("traceparent" in output) | ||
self.assertFalse("tracestate" in output) | ||
|
||
def test_format_not_supported(self): | ||
"""If the traceparent does not adhere to the supported format, discard it and | ||
create a new tracecontext. | ||
|
||
RFC 4.3 | ||
|
||
If the version cannot be parsed, the vendor creates a new traceparent header and | ||
deletes tracestate. | ||
""" | ||
span_context = FORMAT.extract( | ||
get_as_list, | ||
{ | ||
"traceparent": "00-12345678901234567890123456789012-1234567890123456-00-residue", | ||
"tracestate": "foo=1,bar=2,foo=3", | ||
}, | ||
) | ||
self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT) | ||
|
||
def test_propagate_invalid_context(self): | ||
"""Do not propagate invalid trace context. | ||
""" | ||
output = {} # type:typing.Dict[str, str] | ||
FORMAT.inject(trace.INVALID_SPAN_CONTEXT, dict.__setitem__, output) | ||
self.assertFalse("traceparent" in output) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer not using regular expressions for this, as the format should be simple enough to process with
"".split("-")
. Also, I think implementing https://www.w3.org/TR/trace-context/#versioning-of-traceparent would be easier that way. But that is no strong opinion.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that would require checking the validity of the individual values, but I can take a look. regex is very efficient for these type of operations.
I was just curious: the spec looks like it provides no guarantees around the format of future traces. so the most we could do is split on first "-", check the version, and pass-through or ignore if the version is above 00. would that be the right forward-compatible behavior?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think regex is the most efficient way (scientifically) to handle string parsing - if implemented using the correct Ken Thompson DFA/NFA. No strong opinion though :)