open-telemetry · toumorokoshi · Sep 27, 2019 · Sep 26, 2019 · Sep 27, 2019 · Sep 27, 2019
diff --git a/opentelemetry-api/src/opentelemetry/context/propagation/httptextformat.py b/opentelemetry-api/src/opentelemetry/context/propagation/httptextformat.py
@@ -19,8 +19,8 @@
 
 _T = typing.TypeVar("_T")
 
-Setter = typing.Callable[[typing.Type[_T], str, str], None]
-Getter = typing.Callable[[typing.Type[_T], str], typing.List[str]]
+Setter = typing.Callable[[_T, str, str], None]
+Getter = typing.Callable[[_T, str], typing.List[str]]
 
 
 class HTTPTextFormat(abc.ABC):

diff --git a/opentelemetry-api/src/opentelemetry/context/propagation/tracecontexthttptextformat.py b/opentelemetry-api/src/opentelemetry/context/propagation/tracecontexthttptextformat.py
@@ -12,28 +12,151 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+import re
 import typing
 
 import opentelemetry.trace as trace
 from opentelemetry.context.propagation import httptextformat
 
 _T = typing.TypeVar("_T")
 
+#    Keys and values are strings of up to 256 printable US-ASCII characters.
+#    Implementations should conform to the the `W3C Trace Context - Tracestate`_
+#    spec, which describes additional restrictions on valid field values.
+#
+#    .. _W3C Trace Context - Tracestate:
+#        https://www.w3.org/TR/trace-context/#tracestate-field
+
+
+_KEY_WITHOUT_VENDOR_FORMAT = r"[a-z][_0-9a-z\-\*\/]{0,255}"
+_KEY_WITH_VENDOR_FORMAT = (
+    r"[a-z][_0-9a-z\-\*\/]{0,240}@[a-z][_0-9a-z\-\*\/]{0,13}"
+)
+
+_KEY_FORMAT = _KEY_WITHOUT_VENDOR_FORMAT + "|" + _KEY_WITH_VENDOR_FORMAT
+_VALUE_FORMAT = (
+    r"[\x20-\x2b\x2d-\x3c\x3e-\x7e]{0,255}[\x21-\x2b\x2d-\x3c\x3e-\x7e]"
+)
+
+_DELIMITER_FORMAT = "[ \t]*,[ \t]*"
+_MEMBER_FORMAT = "({})(=)({})".format(_KEY_FORMAT, _VALUE_FORMAT)
+
+_DELIMITER_FORMAT_RE = re.compile(_DELIMITER_FORMAT)
+_MEMBER_FORMAT_RE = re.compile(_MEMBER_FORMAT)
+
 
 class TraceContextHTTPTextFormat(httptextformat.HTTPTextFormat):
-    """TODO: extracts and injects using w3c TraceContext's headers.
+    """Extracts and injects using w3c TraceContext's headers.
     """
 
+    _TRACEPARENT_HEADER_NAME = "traceparent"
+    _TRACESTATE_HEADER_NAME = "tracestate"
+    _TRACEPARENT_HEADER_FORMAT = (
+        "^[ \t]*([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})"
+        + "(-.*)?[ \t]*$"
+    )
+    _TRACEPARENT_HEADER_FORMAT_RE = re.compile(_TRACEPARENT_HEADER_FORMAT)
+
+    @classmethod
     def extract(
-        self, _get_from_carrier: httptextformat.Getter[_T], _carrier: _T
+        cls, get_from_carrier: httptextformat.Getter[_T], carrier: _T
     ) -> trace.SpanContext:
-        return trace.INVALID_SPAN_CONTEXT
+        """Extracts a valid SpanContext from the carrier.
+        """
+        header = get_from_carrier(carrier, cls._TRACEPARENT_HEADER_NAME)
+
+        if not header:
+            return trace.INVALID_SPAN_CONTEXT
+
+        match = re.search(cls._TRACEPARENT_HEADER_FORMAT_RE, header[0])
-        match = re.search(cls._TRACEPARENT_HEADER_FORMAT_RE, header[0])
+        match = cls._TRACEPARENT_HEADER_FORMAT_RE.fullmatch(header[0])
-        match = re.search(cls._TRACEPARENT_HEADER_FORMAT_RE, header[0])
+        match = cls._TRACEPARENT_HEADER_FORMAT_RE.fullmatch(header[0])
+        if not match:
+            return trace.INVALID_SPAN_CONTEXT
+
+        version = match.group(1)
+        trace_id = match.group(2)
+        span_id = match.group(3)
+        trace_options = match.group(4)
+
+        if trace_id == "0" * 32 or span_id == "0" * 16:
+            return trace.INVALID_SPAN_CONTEXT
+
+        if version == "00":
+            if match.group(5):
+                return trace.INVALID_SPAN_CONTEXT
+        if version == "ff":
+            return trace.INVALID_SPAN_CONTEXT
 
+        tracestate = trace.TraceState()
+        for tracestate_header in get_from_carrier(
+            carrier, cls._TRACESTATE_HEADER_NAME
+        ):
+            # typing.Dict's update is not recognized by pylint:
+            # https://github.com/PyCQA/pylint/issues/2420
+            tracestate.update(  # pylint:disable=E1101
+                _parse_tracestate(tracestate_header)
+            )
+
+        span_context = trace.SpanContext(
+            trace_id=int(trace_id, 16),
+            span_id=int(span_id, 16),
+            trace_options=trace.TraceOptions(trace_options),
+            trace_state=tracestate,
+        )
+
+        return span_context
+
+    @classmethod
     def inject(
-        self,
+        cls,
         context: trace.SpanContext,
         set_in_carrier: httptextformat.Setter[_T],
         carrier: _T,
     ) -> None:
-        pass
+        if context == trace.INVALID_SPAN_CONTEXT:
+            return
+        traceparent_string = "00-{:032x}-{:016x}-{:02x}".format(
+            context.trace_id, context.span_id, context.trace_options
+        )
+        set_in_carrier(
+            carrier, cls._TRACEPARENT_HEADER_NAME, traceparent_string
+        )
+        if context.trace_state:
+            tracestate_string = _format_tracestate(context.trace_state)
+            set_in_carrier(
+                carrier, cls._TRACESTATE_HEADER_NAME, tracestate_string
+            )
+
+
+def _parse_tracestate(string: str) -> trace.TraceState:
+    """Parse a w3c tracestate header into a TraceState.
+
+    Args:
+        string: the value of the tracestate header.
+
+    Returns:
+        A valid TraceState that contains values extracted from
+        the tracestate header.
+    """
+    tracestate = trace.TraceState()
+    for member in re.split(_DELIMITER_FORMAT_RE, string):
+        match = _MEMBER_FORMAT_RE.match(member)
+        if not match:
+            raise ValueError("illegal key-value format %r" % (member))
+        key, _eq, value = match.groups()
+        # typing.Dict's update is not recognized by pylint:
+        # https://github.com/PyCQA/pylint/issues/2420
+        tracestate[key] = value  # pylint:disable=E1137
+    return tracestate
+
+
+def _format_tracestate(tracestate: trace.TraceState) -> str:
+    """Parse a w3c tracestate header into a TraceState.
+
+    Args:
+        tracestate: the tracestate header to write
+
+    Returns:
+        A string that adheres to the w3c tracestate
+        header format.
+    """
+    return ",".join(key + "=" + value for key, value in tracestate.items())
diff --git a/opentelemetry-api/tests/context/__init__.py b/opentelemetry-api/tests/context/__init__.py
diff --git a/opentelemetry-api/tests/context/propagation/__init__.py b/opentelemetry-api/tests/context/propagation/__init__.py
diff --git a/opentelemetry-api/tests/context/propagation/test_tracecontexthttptextformat.py b/opentelemetry-api/tests/context/propagation/test_tracecontexthttptextformat.py
@@ -0,0 +1,215 @@
+# Copyright 2019, OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+import unittest
+
+from opentelemetry import trace
+from opentelemetry.context.propagation import tracecontexthttptextformat
+
+FORMAT = tracecontexthttptextformat.TraceContextHTTPTextFormat()
+
+
+def get_as_list(
+    dict_object: typing.Dict[str, str], key: str
+) -> typing.List[str]:
+    value = dict_object.get(key)
+    return [value] if value is not None else []
+
+
+class TestTraceContextFormat(unittest.TestCase):
+    TRACE_ID = int("12345678901234567890123456789012", 16)  # type:int
+    SPAN_ID = int("1234567890123456", 16)  # type:int
+
+    def test_no_traceparent_header(self):
+        """When tracecontext headers are not present, a new SpanContext
+        should be created.
+
+        RFC 4.2.2:
+
+        If no traceparent header is received, the vendor creates a new trace-id and parent-id that represents the current request.
+        """
+        output = {}  # type:typing.Dict[str, str]
+        span_context = FORMAT.extract(get_as_list, output)
+        self.assertTrue(isinstance(span_context, trace.SpanContext))
+
+    def test_from_headers_tracestate_entry_limit(self):
+        """If more than 33 entries are passed, allow them.
+
+        We are explicitly choosing not to limit the list members
+        as outlined in RFC 3.3.1.1
+
+        RFC 3.3.1.1
+
+        There can be a maximum of 32 list-members in a list.
+        """
+
+        span_context = FORMAT.extract(
+            get_as_list,
+            {
+                "traceparent": "00-12345678901234567890123456789012-1234567890123456-00",
+                "tracestate": ",".join(
+                    [
+                        "a00=0,a01=1,a02=2,a03=3,a04=4,a05=5,a06=6,a07=7,a08=8,a09=9",
+                        "b00=0,b01=1,b02=2,b03=3,b04=4,b05=5,b06=6,b07=7,b08=8,b09=9",
+                        "c00=0,c01=1,c02=2,c03=3,c04=4,c05=5,c06=6,c07=7,c08=8,c09=9",
+                        "d00=0,d01=1,d02=2",
+                    ]
+                ),
+            },
+        )
+        self.assertEqual(len(span_context.trace_state), 33)
+
+    def test_from_headers_tracestate_duplicated_keys(self):
+        """If a duplicate tracestate header is present, the most recent entry
+        is used.
+
+        RFC 3.3.1.4
+
+        Only one entry per key is allowed because the entry represents that last position in the trace.
+        Hence vendors must overwrite their entry upon reentry to their tracing system.
+
+        For example, if a vendor name is Congo and a trace started in their system and then went through
+        a system named Rojo and later returned to Congo, the tracestate value would not be:
+
+        congo=congosFirstPosition,rojo=rojosFirstPosition,congo=congosSecondPosition
+
+        Instead, the entry would be rewritten to only include the most recent position:
+
+        congo=congosSecondPosition,rojo=rojosFirstPosition
+        """
+        span_context = FORMAT.extract(
+            get_as_list,
+            {
+                "traceparent": "00-12345678901234567890123456789012-1234567890123456-00",
+                "tracestate": "foo=1,bar=2,foo=3",
+            },
+        )
+        self.assertEqual(span_context.trace_state, {"foo": "3", "bar": "2"})
+
+    def test_headers_with_tracestate(self):
+        """When there is a traceparent and tracestate header, data from
+        both should be addded to the SpanContext.
+        """
+        traceparent_value = "00-{trace_id}-{span_id}-00".format(
+            trace_id=format(self.TRACE_ID, "032x"),
+            span_id=format(self.SPAN_ID, "016x"),
+        )
+        tracestate_value = "foo=1,bar=2,baz=3"
+        span_context = FORMAT.extract(
+            get_as_list,
+            {"traceparent": traceparent_value, "tracestate": tracestate_value},
+        )
+        self.assertEqual(span_context.trace_id, self.TRACE_ID)
+        self.assertEqual(span_context.span_id, self.SPAN_ID)
+        self.assertEqual(
+            span_context.trace_state, {"foo": "1", "bar": "2", "baz": "3"}
+        )
+
+        output = {}  # type:typing.Dict[str, str]
+        FORMAT.inject(span_context, dict.__setitem__, output)
+        self.assertEqual(output["traceparent"], traceparent_value)
+        for pair in ["foo=1", "bar=2", "baz=3"]:
+            self.assertIn(pair, output["tracestate"])
+        self.assertEqual(output["tracestate"].count(","), 2)
+
+    def test_invalid_trace_id(self):
+        """If the trace id is invalid, we must ignore the full traceparent header.
+
+        Also ignore any tracestate.
+
+        RFC 3.2.2.3
+
+        If the trace-id value is invalid (for example if it contains non-allowed characters or all
+        zeros), vendors MUST ignore the traceparent.
+
+        RFC 3.3
+
+        If the vendor failed to parse traceparent, it MUST NOT attempt to parse tracestate.
+        Note that the opposite is not true: failure to parse tracestate MUST NOT affect the parsing of traceparent.
+        """
+        span_context = FORMAT.extract(
+            get_as_list,
+            {
+                "traceparent": "00-00000000000000000000000000000000-1234567890123456-00",
+                "tracestate": "foo=1,bar=2,foo=3",
+            },
+        )
+        self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT)
+
+    def test_invalid_parent_id(self):
+        """If the parent id is invalid, we must ignore the full traceparent header.
+
+        Also ignore any tracestate.
+
+        RFC 3.2.2.3
+
+        Vendors MUST ignore the traceparent when the parent-id is invalid (for example,
+        if it contains non-lowercase hex characters).
+
+        RFC 3.3
+
+        If the vendor failed to parse traceparent, it MUST NOT attempt to parse tracestate.
+        Note that the opposite is not true: failure to parse tracestate MUST NOT affect the parsing of traceparent.
+        """
+        span_context = FORMAT.extract(
+            get_as_list,
+            {
+                "traceparent": "00-00000000000000000000000000000000-0000000000000000-00",
+                "tracestate": "foo=1,bar=2,foo=3",
+            },
+        )
+        self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT)
+
+    def test_no_send_empty_tracestate(self):
+        """If the tracestate is empty, do not set the header.
+
+        RFC 3.3.1.1
+
+        Empty and whitespace-only list members are allowed. Vendors MUST accept empty
+        tracestate headers but SHOULD avoid sending them.
+        """
+        output = {}  # type:typing.Dict[str, str]
+        FORMAT.inject(
+            trace.SpanContext(self.TRACE_ID, self.SPAN_ID),
+            dict.__setitem__,
+            output,
+        )
+        self.assertTrue("traceparent" in output)
+        self.assertFalse("tracestate" in output)
+
+    def test_format_not_supported(self):
+        """If the traceparent does not adhere to the supported format, discard it and
+        create a new tracecontext.
+
+        RFC 4.3
+
+        If the version cannot be parsed, the vendor creates a new traceparent header and
+        deletes tracestate.
+        """
+        span_context = FORMAT.extract(
+            get_as_list,
+            {
+                "traceparent": "00-12345678901234567890123456789012-1234567890123456-00-residue",
+                "tracestate": "foo=1,bar=2,foo=3",
+            },
+        )
+        self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT)
+
+    def test_propagate_invalid_context(self):
+        """Do not propagate invalid trace context.
+        """
+        output = {}  # type:typing.Dict[str, str]
+        FORMAT.inject(trace.INVALID_SPAN_CONTEXT, dict.__setitem__, output)
+        self.assertFalse("traceparent" in output)