Skip to content

Commit

Permalink
Implementing W3C TraceContext (fixes #116) (#180)
Browse files Browse the repository at this point in the history
* Implementing TraceContext (fixes #116)

This introduces a w3c TraceContext propagator, primarily inspired by opencensus.
  • Loading branch information
toumorokoshi authored Sep 27, 2019
1 parent d0946cd commit 92353c0
Show file tree
Hide file tree
Showing 5 changed files with 346 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

_T = typing.TypeVar("_T")

Setter = typing.Callable[[typing.Type[_T], str, str], None]
Getter = typing.Callable[[typing.Type[_T], str], typing.List[str]]
Setter = typing.Callable[[_T, str, str], None]
Getter = typing.Callable[[_T, str], typing.List[str]]


class HTTPTextFormat(abc.ABC):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,151 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

import re
import typing

import opentelemetry.trace as trace
from opentelemetry.context.propagation import httptextformat

_T = typing.TypeVar("_T")

# Keys and values are strings of up to 256 printable US-ASCII characters.
# Implementations should conform to the the `W3C Trace Context - Tracestate`_
# spec, which describes additional restrictions on valid field values.
#
# .. _W3C Trace Context - Tracestate:
# https://www.w3.org/TR/trace-context/#tracestate-field


_KEY_WITHOUT_VENDOR_FORMAT = r"[a-z][_0-9a-z\-\*\/]{0,255}"
_KEY_WITH_VENDOR_FORMAT = (
r"[a-z][_0-9a-z\-\*\/]{0,240}@[a-z][_0-9a-z\-\*\/]{0,13}"
)

_KEY_FORMAT = _KEY_WITHOUT_VENDOR_FORMAT + "|" + _KEY_WITH_VENDOR_FORMAT
_VALUE_FORMAT = (
r"[\x20-\x2b\x2d-\x3c\x3e-\x7e]{0,255}[\x21-\x2b\x2d-\x3c\x3e-\x7e]"
)

_DELIMITER_FORMAT = "[ \t]*,[ \t]*"
_MEMBER_FORMAT = "({})(=)({})".format(_KEY_FORMAT, _VALUE_FORMAT)

_DELIMITER_FORMAT_RE = re.compile(_DELIMITER_FORMAT)
_MEMBER_FORMAT_RE = re.compile(_MEMBER_FORMAT)


class TraceContextHTTPTextFormat(httptextformat.HTTPTextFormat):
"""TODO: extracts and injects using w3c TraceContext's headers.
"""Extracts and injects using w3c TraceContext's headers.
"""

_TRACEPARENT_HEADER_NAME = "traceparent"
_TRACESTATE_HEADER_NAME = "tracestate"
_TRACEPARENT_HEADER_FORMAT = (
"^[ \t]*([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})"
+ "(-.*)?[ \t]*$"
)
_TRACEPARENT_HEADER_FORMAT_RE = re.compile(_TRACEPARENT_HEADER_FORMAT)

@classmethod
def extract(
self, _get_from_carrier: httptextformat.Getter[_T], _carrier: _T
cls, get_from_carrier: httptextformat.Getter[_T], carrier: _T
) -> trace.SpanContext:
return trace.INVALID_SPAN_CONTEXT
"""Extracts a valid SpanContext from the carrier.
"""
header = get_from_carrier(carrier, cls._TRACEPARENT_HEADER_NAME)

if not header:
return trace.INVALID_SPAN_CONTEXT

match = re.search(cls._TRACEPARENT_HEADER_FORMAT_RE, header[0])
if not match:
return trace.INVALID_SPAN_CONTEXT

version = match.group(1)
trace_id = match.group(2)
span_id = match.group(3)
trace_options = match.group(4)

if trace_id == "0" * 32 or span_id == "0" * 16:
return trace.INVALID_SPAN_CONTEXT

if version == "00":
if match.group(5):
return trace.INVALID_SPAN_CONTEXT
if version == "ff":
return trace.INVALID_SPAN_CONTEXT

tracestate = trace.TraceState()
for tracestate_header in get_from_carrier(
carrier, cls._TRACESTATE_HEADER_NAME
):
# typing.Dict's update is not recognized by pylint:
# https://github.com/PyCQA/pylint/issues/2420
tracestate.update( # pylint:disable=E1101
_parse_tracestate(tracestate_header)
)

span_context = trace.SpanContext(
trace_id=int(trace_id, 16),
span_id=int(span_id, 16),
trace_options=trace.TraceOptions(trace_options),
trace_state=tracestate,
)

return span_context

@classmethod
def inject(
self,
cls,
context: trace.SpanContext,
set_in_carrier: httptextformat.Setter[_T],
carrier: _T,
) -> None:
pass
if context == trace.INVALID_SPAN_CONTEXT:
return
traceparent_string = "00-{:032x}-{:016x}-{:02x}".format(
context.trace_id, context.span_id, context.trace_options
)
set_in_carrier(
carrier, cls._TRACEPARENT_HEADER_NAME, traceparent_string
)
if context.trace_state:
tracestate_string = _format_tracestate(context.trace_state)
set_in_carrier(
carrier, cls._TRACESTATE_HEADER_NAME, tracestate_string
)


def _parse_tracestate(string: str) -> trace.TraceState:
"""Parse a w3c tracestate header into a TraceState.
Args:
string: the value of the tracestate header.
Returns:
A valid TraceState that contains values extracted from
the tracestate header.
"""
tracestate = trace.TraceState()
for member in re.split(_DELIMITER_FORMAT_RE, string):
match = _MEMBER_FORMAT_RE.match(member)
if not match:
raise ValueError("illegal key-value format %r" % (member))
key, _eq, value = match.groups()
# typing.Dict's update is not recognized by pylint:
# https://github.com/PyCQA/pylint/issues/2420
tracestate[key] = value # pylint:disable=E1137
return tracestate


def _format_tracestate(tracestate: trace.TraceState) -> str:
"""Parse a w3c tracestate header into a TraceState.
Args:
tracestate: the tracestate header to write
Returns:
A string that adheres to the w3c tracestate
header format.
"""
return ",".join(key + "=" + value for key, value in tracestate.items())
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
# Copyright 2019, OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import typing
import unittest

from opentelemetry import trace
from opentelemetry.context.propagation import tracecontexthttptextformat

FORMAT = tracecontexthttptextformat.TraceContextHTTPTextFormat()


def get_as_list(
dict_object: typing.Dict[str, str], key: str
) -> typing.List[str]:
value = dict_object.get(key)
return [value] if value is not None else []


class TestTraceContextFormat(unittest.TestCase):
TRACE_ID = int("12345678901234567890123456789012", 16) # type:int
SPAN_ID = int("1234567890123456", 16) # type:int

def test_no_traceparent_header(self):
"""When tracecontext headers are not present, a new SpanContext
should be created.
RFC 4.2.2:
If no traceparent header is received, the vendor creates a new trace-id and parent-id that represents the current request.
"""
output = {} # type:typing.Dict[str, str]
span_context = FORMAT.extract(get_as_list, output)
self.assertTrue(isinstance(span_context, trace.SpanContext))

def test_from_headers_tracestate_entry_limit(self):
"""If more than 33 entries are passed, allow them.
We are explicitly choosing not to limit the list members
as outlined in RFC 3.3.1.1
RFC 3.3.1.1
There can be a maximum of 32 list-members in a list.
"""

span_context = FORMAT.extract(
get_as_list,
{
"traceparent": "00-12345678901234567890123456789012-1234567890123456-00",
"tracestate": ",".join(
[
"a00=0,a01=1,a02=2,a03=3,a04=4,a05=5,a06=6,a07=7,a08=8,a09=9",
"b00=0,b01=1,b02=2,b03=3,b04=4,b05=5,b06=6,b07=7,b08=8,b09=9",
"c00=0,c01=1,c02=2,c03=3,c04=4,c05=5,c06=6,c07=7,c08=8,c09=9",
"d00=0,d01=1,d02=2",
]
),
},
)
self.assertEqual(len(span_context.trace_state), 33)

def test_from_headers_tracestate_duplicated_keys(self):
"""If a duplicate tracestate header is present, the most recent entry
is used.
RFC 3.3.1.4
Only one entry per key is allowed because the entry represents that last position in the trace.
Hence vendors must overwrite their entry upon reentry to their tracing system.
For example, if a vendor name is Congo and a trace started in their system and then went through
a system named Rojo and later returned to Congo, the tracestate value would not be:
congo=congosFirstPosition,rojo=rojosFirstPosition,congo=congosSecondPosition
Instead, the entry would be rewritten to only include the most recent position:
congo=congosSecondPosition,rojo=rojosFirstPosition
"""
span_context = FORMAT.extract(
get_as_list,
{
"traceparent": "00-12345678901234567890123456789012-1234567890123456-00",
"tracestate": "foo=1,bar=2,foo=3",
},
)
self.assertEqual(span_context.trace_state, {"foo": "3", "bar": "2"})

def test_headers_with_tracestate(self):
"""When there is a traceparent and tracestate header, data from
both should be addded to the SpanContext.
"""
traceparent_value = "00-{trace_id}-{span_id}-00".format(
trace_id=format(self.TRACE_ID, "032x"),
span_id=format(self.SPAN_ID, "016x"),
)
tracestate_value = "foo=1,bar=2,baz=3"
span_context = FORMAT.extract(
get_as_list,
{"traceparent": traceparent_value, "tracestate": tracestate_value},
)
self.assertEqual(span_context.trace_id, self.TRACE_ID)
self.assertEqual(span_context.span_id, self.SPAN_ID)
self.assertEqual(
span_context.trace_state, {"foo": "1", "bar": "2", "baz": "3"}
)

output = {} # type:typing.Dict[str, str]
FORMAT.inject(span_context, dict.__setitem__, output)
self.assertEqual(output["traceparent"], traceparent_value)
for pair in ["foo=1", "bar=2", "baz=3"]:
self.assertIn(pair, output["tracestate"])
self.assertEqual(output["tracestate"].count(","), 2)

def test_invalid_trace_id(self):
"""If the trace id is invalid, we must ignore the full traceparent header.
Also ignore any tracestate.
RFC 3.2.2.3
If the trace-id value is invalid (for example if it contains non-allowed characters or all
zeros), vendors MUST ignore the traceparent.
RFC 3.3
If the vendor failed to parse traceparent, it MUST NOT attempt to parse tracestate.
Note that the opposite is not true: failure to parse tracestate MUST NOT affect the parsing of traceparent.
"""
span_context = FORMAT.extract(
get_as_list,
{
"traceparent": "00-00000000000000000000000000000000-1234567890123456-00",
"tracestate": "foo=1,bar=2,foo=3",
},
)
self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT)

def test_invalid_parent_id(self):
"""If the parent id is invalid, we must ignore the full traceparent header.
Also ignore any tracestate.
RFC 3.2.2.3
Vendors MUST ignore the traceparent when the parent-id is invalid (for example,
if it contains non-lowercase hex characters).
RFC 3.3
If the vendor failed to parse traceparent, it MUST NOT attempt to parse tracestate.
Note that the opposite is not true: failure to parse tracestate MUST NOT affect the parsing of traceparent.
"""
span_context = FORMAT.extract(
get_as_list,
{
"traceparent": "00-00000000000000000000000000000000-0000000000000000-00",
"tracestate": "foo=1,bar=2,foo=3",
},
)
self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT)

def test_no_send_empty_tracestate(self):
"""If the tracestate is empty, do not set the header.
RFC 3.3.1.1
Empty and whitespace-only list members are allowed. Vendors MUST accept empty
tracestate headers but SHOULD avoid sending them.
"""
output = {} # type:typing.Dict[str, str]
FORMAT.inject(
trace.SpanContext(self.TRACE_ID, self.SPAN_ID),
dict.__setitem__,
output,
)
self.assertTrue("traceparent" in output)
self.assertFalse("tracestate" in output)

def test_format_not_supported(self):
"""If the traceparent does not adhere to the supported format, discard it and
create a new tracecontext.
RFC 4.3
If the version cannot be parsed, the vendor creates a new traceparent header and
deletes tracestate.
"""
span_context = FORMAT.extract(
get_as_list,
{
"traceparent": "00-12345678901234567890123456789012-1234567890123456-00-residue",
"tracestate": "foo=1,bar=2,foo=3",
},
)
self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT)

def test_propagate_invalid_context(self):
"""Do not propagate invalid trace context.
"""
output = {} # type:typing.Dict[str, str]
FORMAT.inject(trace.INVALID_SPAN_CONTEXT, dict.__setitem__, output)
self.assertFalse("traceparent" in output)

0 comments on commit 92353c0

Please sign in to comment.