open-telemetry · owais · Aug 19, 2021 · Aug 17, 2021 · lzchen · Aug 18, 2021
@@ -16,6 +16,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add Trace ID validation to meet [TraceID spec](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/overview.md#spancontext) ([#1992](https://github.com/open-telemetry/opentelemetry-python/pull/1992))
 - Fixed Python 3.10 incompatibility in `opentelemetry-opentracing-shim` tests
   ([#2018](https://github.com/open-telemetry/opentelemetry-python/pull/2018))
+- `opentelemetry-sdk` added support for `OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT`
+  ([#2044](https://github.com/open-telemetry/opentelemetry-python/pull/2044))
+- `opentelemetry-sdk` Fixed bugs (#2041, #2042 & #2045) in Span Limits
+  ([#2044](https://github.com/open-telemetry/opentelemetry-python/pull/2044))
 
 ## [0.23.1](https://github.com/open-telemetry/opentelemetry-python/pull/1987) - 2021-07-26
 

@@ -17,36 +17,59 @@
 import threading
 from collections import OrderedDict
 from collections.abc import MutableMapping
-from typing import MutableSequence, Optional, Sequence
+from typing import Optional, Sequence, Union
 
 from opentelemetry.util import types
 
-_VALID_ATTR_VALUE_TYPES = (bool, str, int, float)
+# bytes are accepted as a user supplied value for attributes but
+# decoded to strings internally.
+_VALID_ATTR_VALUE_TYPES = (bool, str, bytes, int, float)
 
 
 _logger = logging.getLogger(__name__)
 
 
-def _is_valid_attribute_value(value: types.AttributeValue) -> bool:
-    """Checks if attribute value is valid.
+def _clean_attribute(
+    key: str, value: types.AttributeValue, max_len: Optional[int]
+) -> Optional[types.AttributeValue]:
+    """Checks if attribute value is valid and cleans it if required.
+
+    The function returns the cleaned value or None if the value is not valid.
 
     An attribute value is valid if it is either:
         - A primitive type: string, boolean, double precision floating
             point (IEEE 754-1985) or integer.
         - An array of primitive type values. The array MUST be homogeneous,
             i.e. it MUST NOT contain values of different types.
+
+    An attribute needs cleansing if:
+        - Its length is greater than the maximum allowed length.
+        - It needs to be encoded/decoded e.g, bytes to strings.
     """
 
+    if key is None or key == "":
+        _logger.warning("invalid key `%s` (empty or null)", key)
+        return None
+
     if isinstance(value, _VALID_ATTR_VALUE_TYPES):
-        return True
+        return _clean_attribute_value(value, max_len)
 
     if isinstance(value, Sequence):
-
         sequence_first_valid_type = None
+        cleaned_seq = []
+
         for element in value:
+            # None is considered valid in any sequence
+            if element is None:
+                cleaned_seq.append(element)
+
+            element = _clean_attribute_value(element, max_len)
+            # reject invalid elements
             if element is None:
                 continue
+
             element_type = type(element)
+            # Reject attribute value if sequence contains a value with an incompatible type.
             if element_type not in _VALID_ATTR_VALUE_TYPES:
                 _logger.warning(
                     "Invalid type %s in attribute value sequence. Expected one of "
@@ -57,56 +80,51 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool:
                         for valid_type in _VALID_ATTR_VALUE_TYPES
                     ],
                 )
-                return False
+                return None
+
             # The type of the sequence must be homogeneous. The first non-None
             # element determines the type of the sequence
             if sequence_first_valid_type is None:
                 sequence_first_valid_type = element_type
-            elif not isinstance(element, sequence_first_valid_type):
+            # use equality instead of isinstance as isinstance(True, int) evaluates to True
+            elif element_type != sequence_first_valid_type:
                 _logger.warning(
                     "Mixed types %s and %s in attribute value sequence",
                     sequence_first_valid_type.__name__,
                     type(element).__name__,
                 )
-                return False
-        return True
+                return None
+
+            cleaned_seq.append(element)
+
+        # Freeze mutable sequences defensively
+        return tuple(cleaned_seq)
 
     _logger.warning(
         "Invalid type %s for attribute value. Expected one of %s or a "
         "sequence of those types",
         type(value).__name__,
         [valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES],
     )
-    return False
-
+    return None
 
-def _filter_attributes(attributes: types.Attributes) -> None:
-    """Applies attribute validation rules and drops (key, value) pairs
-    that doesn't adhere to attributes specification.
 
-    https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/common/common.md#attributes.
-    """
-    if attributes:
-        for attr_key, attr_value in list(attributes.items()):
-            if not attr_key:
-                _logger.warning("invalid key `%s` (empty or null)", attr_key)
-                attributes.pop(attr_key)
-                continue
+def _clean_attribute_value(
+    value: types.AttributeValue, limit: Optional[int]
+) -> Union[types.AttributeValue, None]:
+    if value is None:
+        return None
 
-            if _is_valid_attribute_value(attr_value):
-                if isinstance(attr_value, MutableSequence):
-                    attributes[attr_key] = tuple(attr_value)
-                if isinstance(attr_value, bytes):
-                    try:
-                        attributes[attr_key] = attr_value.decode()
-                    except ValueError:
-                        attributes.pop(attr_key)
-                        _logger.warning("Byte attribute could not be decoded.")
-            else:
-                attributes.pop(attr_key)
+    if isinstance(value, bytes):
+        try:
+            value = value.decode()
+        except ValueError:
+            _logger.warning("Byte attribute could not be decoded.")
+            return None
 
-
-_DEFAULT_LIMIT = 128
+    if limit is not None and isinstance(value, str):
+        value = value[:limit]
+    return value
 
 
 class BoundedAttributes(MutableMapping):
@@ -118,9 +136,10 @@ class BoundedAttributes(MutableMapping):
 
     def __init__(
         self,
-        maxlen: Optional[int] = _DEFAULT_LIMIT,
+        maxlen: Optional[int] = None,
         attributes: types.Attributes = None,
         immutable: bool = True,
+        max_value_len: Optional[int] = None,
     ):
         if maxlen is not None:
             if not isinstance(maxlen, int) or maxlen < 0:
@@ -129,10 +148,10 @@ def __init__(
                 )
         self.maxlen = maxlen
         self.dropped = 0
+        self.max_value_len = max_value_len
         self._dict = OrderedDict()  # type: OrderedDict
         self._lock = threading.Lock()  # type: threading.Lock
         if attributes:
-            _filter_attributes(attributes)
             for key, value in attributes.items():
                 self[key] = value
         self._immutable = immutable
@@ -158,7 +177,10 @@ def __setitem__(self, key, value):
             elif self.maxlen is not None and len(self._dict) == self.maxlen:
                 del self._dict[next(iter(self._dict.keys()))]
                 self.dropped += 1
-            self._dict[key] = value
+
+            value = _clean_attribute(key, value, self.max_value_len)
+            if value is not None:
+                self._dict[key] = value
 
     def __delitem__(self, key):
         if getattr(self, "_immutable", False):

@@ -16,69 +16,48 @@
 
 import collections
 import unittest
+from typing import MutableSequence
 
-from opentelemetry.attributes import (
-    BoundedAttributes,
-    _filter_attributes,
-    _is_valid_attribute_value,
-)
+from opentelemetry.attributes import BoundedAttributes, _clean_attribute
 
 
 class TestAttributes(unittest.TestCase):
-    def test_is_valid_attribute_value(self):
-        self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, "ss", 4]))
-        self.assertFalse(_is_valid_attribute_value([dict(), 1, 2, 3.4, 4]))
-        self.assertFalse(_is_valid_attribute_value(["sw", "lf", 3.4, "ss"]))
-        self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, 5]))
-        self.assertFalse(_is_valid_attribute_value(dict()))
-        self.assertTrue(_is_valid_attribute_value(True))
-        self.assertTrue(_is_valid_attribute_value("hi"))
-        self.assertTrue(_is_valid_attribute_value(3.4))
-        self.assertTrue(_is_valid_attribute_value(15))
-        self.assertTrue(_is_valid_attribute_value([1, 2, 3, 5]))
-        self.assertTrue(_is_valid_attribute_value([1.2, 2.3, 3.4, 4.5]))
-        self.assertTrue(_is_valid_attribute_value([True, False]))
-        self.assertTrue(_is_valid_attribute_value(["ss", "dw", "fw"]))
-        self.assertTrue(_is_valid_attribute_value([]))
+    def assertValid(self, value, key="k"):
+        expected = value
+        if isinstance(value, MutableSequence):
+            expected = tuple(value)
+        self.assertEqual(_clean_attribute(key, value, None), expected)
+
+    def assertInvalid(self, value, key="k"):
+        self.assertIsNone(_clean_attribute(key, value, None))
+
+    def test_clean_attribute(self):
+        self.assertInvalid([1, 2, 3.4, "ss", 4])
+        self.assertInvalid([dict(), 1, 2, 3.4, 4])
+        self.assertInvalid(["sw", "lf", 3.4, "ss"])
+        self.assertInvalid([1, 2, 3.4, 5])
+        self.assertInvalid(dict())
+        self.assertInvalid([1, True])
+        self.assertValid(True)
+        self.assertValid("hi")
+        self.assertValid(3.4)
+        self.assertValid(15)
+        self.assertValid([1, 2, 3, 5])
+        self.assertValid([1.2, 2.3, 3.4, 4.5])
+        self.assertValid([True, False])
+        self.assertValid(["ss", "dw", "fw"])
+        self.assertValid([])
         # None in sequences are valid
-        self.assertTrue(_is_valid_attribute_value(["A", None, None]))
-        self.assertTrue(_is_valid_attribute_value(["A", None, None, "B"]))
-        self.assertTrue(_is_valid_attribute_value([None, None]))
-        self.assertFalse(_is_valid_attribute_value(["A", None, 1]))
-        self.assertFalse(_is_valid_attribute_value([None, "A", None, 1]))
-
-    def test_filter_attributes(self):
-        attrs_with_invalid_keys = {
-            "": "empty-key",
-            None: "None-value",
-            "attr-key": "attr-value",
-        }
-        _filter_attributes(attrs_with_invalid_keys)
-        self.assertTrue(len(attrs_with_invalid_keys), 1)
-        self.assertEqual(attrs_with_invalid_keys, {"attr-key": "attr-value"})
-
-        attrs_with_invalid_values = {
-            "nonhomogeneous": [1, 2, 3.4, "ss", 4],
-            "nonprimitive": dict(),
-            "mixed": [1, 2.4, "st", dict()],
-            "validkey1": "validvalue1",
-            "intkey": 5,
-            "floatkey": 3.14,
-            "boolkey": True,
-            "valid-byte-string": b"hello-otel",
-        }
-        _filter_attributes(attrs_with_invalid_values)
-        self.assertEqual(len(attrs_with_invalid_values), 5)
-        self.assertEqual(
-            attrs_with_invalid_values,
-            {
-                "validkey1": "validvalue1",
-                "intkey": 5,
-                "floatkey": 3.14,
-                "boolkey": True,
-                "valid-byte-string": "hello-otel",
-            },
-        )
+        self.assertValid(["A", None, None])
+        self.assertValid(["A", None, None, "B"])
+        self.assertValid([None, None])
+        self.assertInvalid(["A", None, 1])
+        self.assertInvalid([None, "A", None, 1])
+
+        # test keys
+        self.assertValid("value", "key")
+        self.assertInvalid("value", "")
+        self.assertInvalid("value", None)
 
 
 class TestBoundedAttributes(unittest.TestCase):

@@ -98,6 +98,22 @@
 Default: 512
 """
 
+OTEL_EVENT_ATTRIBUTE_COUNT_LIMIT = "OTEL_EVENT_ATTRIBUTE_COUNT_LIMIT"
+"""
+.. envvar:: OTEL_EVENT_ATTRIBUTE_COUNT_LIMIT
+
+The :envvar:`OTEL_EVENT_ATTRIBUTE_COUNT_LIMIT` represents the maximum allowed event attribute count.
+Default: 128
+"""
+
+OTEL_LINK_ATTRIBUTE_COUNT_LIMIT = "OTEL_LINK_ATTRIBUTE_COUNT_LIMIT"
+"""
+.. envvar:: OTEL_LINK_ATTRIBUTE_COUNT_LIMIT
+
+The :envvar:`OTEL_LINK_ATTRIBUTE_COUNT_LIMIT` represents the maximum allowed link attribute count.
+Default: 128
+"""
+
 OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT = "OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT"
 """
 .. envvar:: OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT
@@ -106,6 +122,15 @@
 Default: 128
 """
 
+OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT = (
+    "OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT"
+)
+"""
+.. envvar:: OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT
+
+The :envvar:`OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT` represents the maximum allowed length attribute values can have.
+"""
+
 OTEL_SPAN_EVENT_COUNT_LIMIT = "OTEL_SPAN_EVENT_COUNT_LIMIT"
 """
 .. envvar:: OTEL_SPAN_EVENT_COUNT_LIMIT