Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing W3C TraceContext (fixes #116) #180

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

_T = typing.TypeVar("_T")

Setter = typing.Callable[[typing.Type[_T], str, str], None]
Getter = typing.Callable[[typing.Type[_T], str], typing.List[str]]
Setter = typing.Callable[[_T, str, str], None]
Getter = typing.Callable[[_T, str], typing.List[str]]


class HTTPTextFormat(abc.ABC):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,151 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

import re
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer not using regular expressions for this, as the format should be simple enough to process with "".split("-"). Also, I think implementing https://www.w3.org/TR/trace-context/#versioning-of-traceparent would be easier that way. But that is no strong opinion.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that would require checking the validity of the individual values, but I can take a look. regex is very efficient for these type of operations.

I was just curious: the spec looks like it provides no guarantees around the format of future traces. so the most we could do is split on first "-", check the version, and pass-through or ignore if the version is above 00. would that be the right forward-compatible behavior?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think regex is the most efficient way (scientifically) to handle string parsing - if implemented using the correct Ken Thompson DFA/NFA. No strong opinion though :)

import typing

import opentelemetry.trace as trace
from opentelemetry.context.propagation import httptextformat

_T = typing.TypeVar("_T")

# Keys and values are strings of up to 256 printable US-ASCII characters.
# Implementations should conform to the the `W3C Trace Context - Tracestate`_
# spec, which describes additional restrictions on valid field values.
#
# .. _W3C Trace Context - Tracestate:
# https://www.w3.org/TR/trace-context/#tracestate-field


_KEY_WITHOUT_VENDOR_FORMAT = r"[a-z][_0-9a-z\-\*\/]{0,255}"
_KEY_WITH_VENDOR_FORMAT = (
r"[a-z][_0-9a-z\-\*\/]{0,240}@[a-z][_0-9a-z\-\*\/]{0,13}"
)

_KEY_FORMAT = _KEY_WITHOUT_VENDOR_FORMAT + "|" + _KEY_WITH_VENDOR_FORMAT
_VALUE_FORMAT = (
r"[\x20-\x2b\x2d-\x3c\x3e-\x7e]{0,255}[\x21-\x2b\x2d-\x3c\x3e-\x7e]"
)

_DELIMITER_FORMAT = "[ \t]*,[ \t]*"
_MEMBER_FORMAT = "({})(=)({})".format(_KEY_FORMAT, _VALUE_FORMAT)

_DELIMITER_FORMAT_RE = re.compile(_DELIMITER_FORMAT)
_MEMBER_FORMAT_RE = re.compile(_MEMBER_FORMAT)


class TraceContextHTTPTextFormat(httptextformat.HTTPTextFormat):
"""TODO: extracts and injects using w3c TraceContext's headers.
"""Extracts and injects using w3c TraceContext's headers.
"""

_TRACEPARENT_HEADER_NAME = "traceparent"
_TRACESTATE_HEADER_NAME = "tracestate"
_TRACEPARENT_HEADER_FORMAT = (
"^[ \t]*([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})"
+ "(-.*)?[ \t]*$"
)
_TRACEPARENT_HEADER_FORMAT_RE = re.compile(_TRACEPARENT_HEADER_FORMAT)

@classmethod
def extract(
self, _get_from_carrier: httptextformat.Getter[_T], _carrier: _T
cls, get_from_carrier: httptextformat.Getter[_T], carrier: _T
) -> trace.SpanContext:
return trace.INVALID_SPAN_CONTEXT
"""Extracts a valid SpanContext from the carrier.
"""
header = get_from_carrier(carrier, cls._TRACEPARENT_HEADER_NAME)

if not header:
return trace.INVALID_SPAN_CONTEXT

match = re.search(cls._TRACEPARENT_HEADER_FORMAT_RE, header[0])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
match = re.search(cls._TRACEPARENT_HEADER_FORMAT_RE, header[0])
match = cls._TRACEPARENT_HEADER_FORMAT_RE.fullmatch(header[0])
  • If you have a regex object, I think it looks better to use the method instead of the the re module's function (I did not know that this even works)
  • search is bad because it may skip over any junk before the content we are searching for. match is better because it ignores only junk after the end but fullmatch is probably what we should use here.

EDIT: I see now you have used ^$ in the regex, but IMHO fullmatch is still more clear than search (and maybe more efficient).

if not match:
return trace.INVALID_SPAN_CONTEXT

version = match.group(1)
trace_id = match.group(2)
span_id = match.group(3)
trace_options = match.group(4)

if trace_id == "0" * 32 or span_id == "0" * 16:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this check is very useful here. I'd only check after constructing span_context at the end, that way we can do integer comparisons.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That seems ok. The one point there is we have to reject all values if either is invalid, as well as this skips some level of processing. But should be fine.

return trace.INVALID_SPAN_CONTEXT

if version == "00":
if match.group(5):
return trace.INVALID_SPAN_CONTEXT
if version == "ff":
return trace.INVALID_SPAN_CONTEXT

tracestate = trace.TraceState()
for tracestate_header in get_from_carrier(
carrier, cls._TRACESTATE_HEADER_NAME
):
# typing.Dict's update is not recognized by pylint:
# https://github.com/PyCQA/pylint/issues/2420
tracestate.update( # pylint:disable=E1101
_parse_tracestate(tracestate_header)
)

span_context = trace.SpanContext(
trace_id=int(trace_id, 16),
span_id=int(span_id, 16),
trace_options=trace.TraceOptions(trace_options),
trace_state=tracestate,
)

return span_context

@classmethod
def inject(
self,
cls,
context: trace.SpanContext,
set_in_carrier: httptextformat.Setter[_T],
carrier: _T,
) -> None:
pass
if context == trace.INVALID_SPAN_CONTEXT:
return
traceparent_string = "00-{:032x}-{:016x}-{:02x}".format(
context.trace_id, context.span_id, context.trace_options
)
set_in_carrier(
carrier, cls._TRACEPARENT_HEADER_NAME, traceparent_string
)
if context.trace_state:
tracestate_string = _format_tracestate(context.trace_state)
set_in_carrier(
carrier, cls._TRACESTATE_HEADER_NAME, tracestate_string
)


def _parse_tracestate(string: str) -> trace.TraceState:
"""Parse a w3c tracestate header into a TraceState.

Args:
string: the value of the tracestate header.

Returns:
A valid TraceState that contains values extracted from
the tracestate header.
"""
tracestate = trace.TraceState()
for member in re.split(_DELIMITER_FORMAT_RE, string):
match = _MEMBER_FORMAT_RE.match(member)
if not match:
raise ValueError("illegal key-value format %r" % (member))
key, _eq, value = match.groups()
# typing.Dict's update is not recognized by pylint:
# https://github.com/PyCQA/pylint/issues/2420
tracestate[key] = value # pylint:disable=E1137
return tracestate


def _format_tracestate(tracestate: trace.TraceState) -> str:
"""Parse a w3c tracestate header into a TraceState.

Args:
tracestate: the tracestate header to write

Returns:
A string that adheres to the w3c tracestate
header format.
"""
return ",".join(key + "=" + value for key, value in tracestate.items())
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
# Copyright 2019, OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import typing
import unittest

from opentelemetry import trace
from opentelemetry.context.propagation import tracecontexthttptextformat

FORMAT = tracecontexthttptextformat.TraceContextHTTPTextFormat()


def get_as_list(
dict_object: typing.Dict[str, str], key: str
) -> typing.List[str]:
value = dict_object.get(key)
return [value] if value is not None else []


class TestTraceContextFormat(unittest.TestCase):
TRACE_ID = int("12345678901234567890123456789012", 16) # type:int
SPAN_ID = int("1234567890123456", 16) # type:int

def test_no_traceparent_header(self):
"""When tracecontext headers are not present, a new SpanContext
should be created.

RFC 4.2.2:

If no traceparent header is received, the vendor creates a new trace-id and parent-id that represents the current request.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you wrap these comments?

"""
output = {} # type:typing.Dict[str, str]
span_context = FORMAT.extract(get_as_list, output)
self.assertTrue(isinstance(span_context, trace.SpanContext))

def test_from_headers_tracestate_entry_limit(self):
"""If more than 33 entries are passed, allow them.

We are explicitly choosing not to limit the list members
as outlined in RFC 3.3.1.1

RFC 3.3.1.1

There can be a maximum of 32 list-members in a list.
"""

span_context = FORMAT.extract(
get_as_list,
{
"traceparent": "00-12345678901234567890123456789012-1234567890123456-00",
"tracestate": ",".join(
[
"a00=0,a01=1,a02=2,a03=3,a04=4,a05=5,a06=6,a07=7,a08=8,a09=9",
"b00=0,b01=1,b02=2,b03=3,b04=4,b05=5,b06=6,b07=7,b08=8,b09=9",
"c00=0,c01=1,c02=2,c03=3,c04=4,c05=5,c06=6,c07=7,c08=8,c09=9",
"d00=0,d01=1,d02=2",
]
),
},
)
self.assertEqual(len(span_context.trace_state), 33)

def test_from_headers_tracestate_duplicated_keys(self):
"""If a duplicate tracestate header is present, the most recent entry
is used.

RFC 3.3.1.4

Only one entry per key is allowed because the entry represents that last position in the trace.
Hence vendors must overwrite their entry upon reentry to their tracing system.

For example, if a vendor name is Congo and a trace started in their system and then went through
a system named Rojo and later returned to Congo, the tracestate value would not be:

congo=congosFirstPosition,rojo=rojosFirstPosition,congo=congosSecondPosition

Instead, the entry would be rewritten to only include the most recent position:

congo=congosSecondPosition,rojo=rojosFirstPosition
"""
span_context = FORMAT.extract(
get_as_list,
{
"traceparent": "00-12345678901234567890123456789012-1234567890123456-00",
"tracestate": "foo=1,bar=2,foo=3",
},
)
self.assertEqual(span_context.trace_state, {"foo": "3", "bar": "2"})

def test_headers_with_tracestate(self):
"""When there is a traceparent and tracestate header, data from
both should be addded to the SpanContext.
"""
traceparent_value = "00-{trace_id}-{span_id}-00".format(
trace_id=format(self.TRACE_ID, "032x"),
span_id=format(self.SPAN_ID, "016x"),
)
tracestate_value = "foo=1,bar=2,baz=3"
span_context = FORMAT.extract(
get_as_list,
{"traceparent": traceparent_value, "tracestate": tracestate_value},
)
self.assertEqual(span_context.trace_id, self.TRACE_ID)
self.assertEqual(span_context.span_id, self.SPAN_ID)
self.assertEqual(
span_context.trace_state, {"foo": "1", "bar": "2", "baz": "3"}
)

output = {} # type:typing.Dict[str, str]
FORMAT.inject(span_context, dict.__setitem__, output)
self.assertEqual(output["traceparent"], traceparent_value)
for pair in ["foo=1", "bar=2", "baz=3"]:
self.assertIn(pair, output["tracestate"])
self.assertEqual(output["tracestate"].count(","), 2)

def test_invalid_trace_id(self):
"""If the trace id is invalid, we must ignore the full traceparent header.

Also ignore any tracestate.

RFC 3.2.2.3

If the trace-id value is invalid (for example if it contains non-allowed characters or all
zeros), vendors MUST ignore the traceparent.

RFC 3.3

If the vendor failed to parse traceparent, it MUST NOT attempt to parse tracestate.
Note that the opposite is not true: failure to parse tracestate MUST NOT affect the parsing of traceparent.
"""
span_context = FORMAT.extract(
get_as_list,
{
"traceparent": "00-00000000000000000000000000000000-1234567890123456-00",
"tracestate": "foo=1,bar=2,foo=3",
},
)
self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT)

def test_invalid_parent_id(self):
"""If the parent id is invalid, we must ignore the full traceparent header.

Also ignore any tracestate.

RFC 3.2.2.3

Vendors MUST ignore the traceparent when the parent-id is invalid (for example,
if it contains non-lowercase hex characters).

RFC 3.3

If the vendor failed to parse traceparent, it MUST NOT attempt to parse tracestate.
Note that the opposite is not true: failure to parse tracestate MUST NOT affect the parsing of traceparent.
"""
span_context = FORMAT.extract(
get_as_list,
{
"traceparent": "00-00000000000000000000000000000000-0000000000000000-00",
"tracestate": "foo=1,bar=2,foo=3",
},
)
self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT)

def test_no_send_empty_tracestate(self):
"""If the tracestate is empty, do not set the header.

RFC 3.3.1.1

Empty and whitespace-only list members are allowed. Vendors MUST accept empty
tracestate headers but SHOULD avoid sending them.
"""
output = {} # type:typing.Dict[str, str]
FORMAT.inject(
trace.SpanContext(self.TRACE_ID, self.SPAN_ID),
dict.__setitem__,
output,
)
self.assertTrue("traceparent" in output)
self.assertFalse("tracestate" in output)

def test_format_not_supported(self):
"""If the traceparent does not adhere to the supported format, discard it and
create a new tracecontext.

RFC 4.3

If the version cannot be parsed, the vendor creates a new traceparent header and
deletes tracestate.
"""
span_context = FORMAT.extract(
get_as_list,
{
"traceparent": "00-12345678901234567890123456789012-1234567890123456-00-residue",
"tracestate": "foo=1,bar=2,foo=3",
},
)
self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT)

def test_propagate_invalid_context(self):
"""Do not propagate invalid trace context.
"""
output = {} # type:typing.Dict[str, str]
FORMAT.inject(trace.INVALID_SPAN_CONTEXT, dict.__setitem__, output)
self.assertFalse("traceparent" in output)