From 5be7a88d7d5dd3315a2df2660052d916b8302aea Mon Sep 17 00:00:00 2001 From: Owais Lone Date: Tue, 11 May 2021 06:02:44 +0530 Subject: [PATCH] Added max attribute length span limit support --- CHANGELOG.md | 4 + .../src/opentelemetry/attributes/__init__.py | 93 +++++++--- .../tests/attributes/test_attributes.py | 161 +++++++++++++++--- .../sdk/environment_variables/__init__.py | 5 + .../opentelemetry/sdk/resources/__init__.py | 4 +- .../src/opentelemetry/sdk/trace/__init__.py | 65 ++++--- opentelemetry-sdk/tests/trace/test_trace.py | 59 ++++++- 7 files changed, 311 insertions(+), 80 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab7c822691f..6585d8ac175 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased](https://github.com/open-telemetry/opentelemetry-python/compare/v1.2.0-0.21b0...HEAD) +### Added +- Added support for OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT. + ([#1847](https://github.com/open-telemetry/opentelemetry-python/pull/1847)) + ### Added - Allow span limits to be set programatically via TracerProvider. ([#1877](https://github.com/open-telemetry/opentelemetry-python/pull/1877)) diff --git a/opentelemetry-api/src/opentelemetry/attributes/__init__.py b/opentelemetry-api/src/opentelemetry/attributes/__init__.py index 6875f56631f..eebd9f35326 100644 --- a/opentelemetry-api/src/opentelemetry/attributes/__init__.py +++ b/opentelemetry-api/src/opentelemetry/attributes/__init__.py @@ -15,7 +15,7 @@ import logging from types import MappingProxyType -from typing import MutableSequence, Sequence +from typing import MutableSequence, Optional, Sequence, Tuple from opentelemetry.util import types @@ -25,7 +25,9 @@ _logger = logging.getLogger(__name__) -def _is_valid_attribute_value(value: types.AttributeValue) -> bool: +def _clean_attribute_value( + value: types.AttributeValue, max_size: Optional[int] +) -> Tuple[bool, Optional[types.AttributeValue]]: """Checks if attribute value is valid. An attribute value is valid if it is either: @@ -33,15 +35,30 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool: point (IEEE 754-1985) or integer. - An array of primitive type values. The array MUST be homogeneous, i.e. it MUST NOT contain values of different types. + + This function tries to clean attribute values according to the following rules: + - bytes are decoded to strings. + - When ``max_size`` argument is set, any string/bytes values longer than the value + are truncated to the specified max length. + - ``Sequence`` values other than strings such as lists are converted to immutable tuples. + + + If the attribute value is modified or converted to another type, the new value is returned + as the second return value. + If the attribute value is not modified, ``None`` is returned as the second return value. """ - if isinstance(value, Sequence): + # pylint: disable=too-many-branches + modified = False + if isinstance(value, Sequence) and not isinstance(value, (str, bytes)): if len(value) == 0: - return True + return True, None sequence_first_valid_type = None + new_value = [] for element in value: if element is None: + new_value.append(element) continue element_type = type(element) if element_type not in _VALID_ATTR_VALUE_TYPES: @@ -54,7 +71,7 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool: for valid_type in _VALID_ATTR_VALUE_TYPES ], ) - return False + return False, None # The type of the sequence must be homogeneous. The first non-None # element determines the type of the sequence if sequence_first_valid_type is None: @@ -65,7 +82,23 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool: sequence_first_valid_type.__name__, type(element).__name__, ) - return False + return False, None + if max_size is not None and isinstance(element, str): + element = element[:max_size] + modified = True + new_value.append(element) + # Freeze mutable sequences defensively + if isinstance(value, MutableSequence): + modified = True + value = tuple(new_value) + + elif isinstance(value, bytes): + try: + value = value.decode() + modified = True + except ValueError: + _logger.warning("Byte attribute could not be decoded.") + return False, None elif not isinstance(value, _VALID_ATTR_VALUE_TYPES): _logger.warning( @@ -74,34 +107,38 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool: type(value).__name__, [valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES], ) - return False - return True + return False, None + if max_size is not None and isinstance(value, str): + value = value[:max_size] + modified = True + return True, value if modified else None -def _filter_attributes(attributes: types.Attributes) -> None: - """Applies attribute validation rules and drops (key, value) pairs + +def _clean_attributes( + attributes: types.Attributes, max_size: Optional[int] +) -> None: + """Applies attribute validation rules and truncates/drops (key, value) pairs that doesn't adhere to attributes specification. https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/common/common.md#attributes. """ - if attributes: - for attr_key, attr_value in list(attributes.items()): - if not attr_key: - _logger.warning("invalid key `%s` (empty or null)", attr_key) - attributes.pop(attr_key) - continue - - if _is_valid_attribute_value(attr_value): - if isinstance(attr_value, MutableSequence): - attributes[attr_key] = tuple(attr_value) - if isinstance(attr_value, bytes): - try: - attributes[attr_key] = attr_value.decode() - except ValueError: - attributes.pop(attr_key) - _logger.warning("Byte attribute could not be decoded.") - else: - attributes.pop(attr_key) + if not attributes: + return + + for attr_key, attr_value in list(attributes.items()): + if not attr_key: + _logger.warning("invalid key `%s` (empty or null)", attr_key) + attributes.pop(attr_key) + continue + + valid, cleaned_value = _clean_attribute_value(attr_value, max_size) + if not valid: + attributes.pop(attr_key) + continue + + if cleaned_value is not None: + attributes[attr_key] = cleaned_value def _create_immutable_attributes( diff --git a/opentelemetry-api/tests/attributes/test_attributes.py b/opentelemetry-api/tests/attributes/test_attributes.py index 2a391f78af7..2a0c58e0a55 100644 --- a/opentelemetry-api/tests/attributes/test_attributes.py +++ b/opentelemetry-api/tests/attributes/test_attributes.py @@ -17,42 +17,151 @@ import unittest from opentelemetry.attributes import ( + _clean_attribute_value, + _clean_attributes, _create_immutable_attributes, - _filter_attributes, - _is_valid_attribute_value, ) class TestAttributes(unittest.TestCase): - def test_is_valid_attribute_value(self): - self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, "ss", 4])) - self.assertFalse(_is_valid_attribute_value([dict(), 1, 2, 3.4, 4])) - self.assertFalse(_is_valid_attribute_value(["sw", "lf", 3.4, "ss"])) - self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, 5])) - self.assertFalse(_is_valid_attribute_value(dict())) - self.assertTrue(_is_valid_attribute_value(True)) - self.assertTrue(_is_valid_attribute_value("hi")) - self.assertTrue(_is_valid_attribute_value(3.4)) - self.assertTrue(_is_valid_attribute_value(15)) - self.assertTrue(_is_valid_attribute_value([1, 2, 3, 5])) - self.assertTrue(_is_valid_attribute_value([1.2, 2.3, 3.4, 4.5])) - self.assertTrue(_is_valid_attribute_value([True, False])) - self.assertTrue(_is_valid_attribute_value(["ss", "dw", "fw"])) - self.assertTrue(_is_valid_attribute_value([])) - # None in sequences are valid - self.assertTrue(_is_valid_attribute_value(["A", None, None])) - self.assertTrue(_is_valid_attribute_value(["A", None, None, "B"])) - self.assertTrue(_is_valid_attribute_value([None, None])) - self.assertFalse(_is_valid_attribute_value(["A", None, 1])) - self.assertFalse(_is_valid_attribute_value([None, "A", None, 1])) + def assertCleanAttr(self, value, valid): + # pylint: disable=protected-access + is_valid, cleaned = _clean_attribute_value(value, None) + self.assertEqual(is_valid, valid) + self.assertEqual(cleaned, value if valid else None) - def test_filter_attributes(self): + def test_validate_attribute_value(self): + test_cases = [ + ( + [1, 2, 3.4, "ss", 4], + False, + ), + ( + [dict(), 1, 2, 3.4, 4], + False, + ), + ( + ["sw", "lf", 3.4, "ss"], + False, + ), + ( + [1, 2, 3.4, 5], + False, + ), + ( + dict(), + False, + ), + ( + True, + True, + ), + ( + "hi", + True, + ), + ( + 3.4, + True, + ), + ( + 15, + True, + ), + ( + (1, 2, 3, 5), + True, + ), + ( + (1.2, 2.3, 3.4, 4.5), + True, + ), + ( + (True, False), + True, + ), + ( + ("ss", "dw", "fw"), + True, + ), + ( + [], + True, + ), + # None in sequences are valid + ( + ("A", None, None), + True, + ), + ( + ("A", None, None, "B"), + True, + ), + ( + (None, None), + True, + ), + ( + ["A", None, 1], + False, + ), + ( + [None, "A", None, 1], + False, + ), + ] + + for value, want_valid in test_cases: + # pylint: disable=protected-access + got_valid, cleaned_value = _clean_attribute_value(value, None) + self.assertEqual(got_valid, want_valid) + self.assertIsNone(cleaned_value) + + def test_clean_attribute_value_truncate(self): + test_cases = [ + ("a" * 50, None, None), + ("a" * 50, "a" * 10, 10), + ("abc", "a", 1), + ("abc" * 50, "abcabcabca", 10), + ("abc" * 50, "abc" * 50, 1000), + ("abc" * 50, None, None), + ([1, 2, 3, 5], (1, 2, 3, 5), 10), + ( + [1.2, 2.3], + ( + 1.2, + 2.3, + ), + 20, + ), + ([True, False], (True, False), 10), + ([], None, 10), + (True, None, 10), + ( + 3.4, + None, + True, + ), + ( + 15, + None, + True, + ), + ] + + for value, expected, limit in test_cases: + # pylint: disable=protected-access + valid, cleaned = _clean_attribute_value(value, limit) + self.assertTrue(valid) + self.assertEqual(cleaned, expected) + + def test_clean_attributes(self): attrs_with_invalid_keys = { "": "empty-key", None: "None-value", "attr-key": "attr-value", } - _filter_attributes(attrs_with_invalid_keys) + _clean_attributes(attrs_with_invalid_keys, None) self.assertTrue(len(attrs_with_invalid_keys), 1) self.assertEqual(attrs_with_invalid_keys, {"attr-key": "attr-value"}) @@ -66,7 +175,7 @@ def test_filter_attributes(self): "boolkey": True, "valid-byte-string": b"hello-otel", } - _filter_attributes(attrs_with_invalid_values) + _clean_attributes(attrs_with_invalid_values, None) self.assertEqual(len(attrs_with_invalid_values), 5) self.assertEqual( attrs_with_invalid_values, diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py index 0aa5aa515d7..70f3e3d9441 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py @@ -73,6 +73,11 @@ .. envvar:: OTEL_BSP_MAX_EXPORT_BATCH_SIZE """ +OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT = "OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT" +""" +.. envvar:: OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT +""" + OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT = "OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT" """ .. envvar:: OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py index 24e5321ce90..772becac775 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py @@ -64,7 +64,7 @@ import pkg_resources -from opentelemetry.attributes import _filter_attributes +from opentelemetry.attributes import _clean_attributes from opentelemetry.sdk.environment_variables import ( OTEL_RESOURCE_ATTRIBUTES, OTEL_SERVICE_NAME, @@ -144,7 +144,7 @@ class Resource: def __init__( self, attributes: Attributes, schema_url: typing.Optional[str] = None ): - _filter_attributes(attributes) + _clean_attributes(attributes, None) self._attributes = attributes.copy() if schema_url is None: schema_url = "" diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py index 66b4b383905..dde6e6ef7a9 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py @@ -40,13 +40,14 @@ from opentelemetry import context as context_api from opentelemetry import trace as trace_api from opentelemetry.attributes import ( + _clean_attribute_value, + _clean_attributes, _create_immutable_attributes, - _filter_attributes, - _is_valid_attribute_value, ) from opentelemetry.sdk import util from opentelemetry.sdk.environment_variables import ( OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT, + OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT, OTEL_SPAN_EVENT_COUNT_LIMIT, OTEL_SPAN_LINK_COUNT_LIMIT, ) @@ -65,10 +66,11 @@ _DEFAULT_OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT = 128 _DEFAULT_OTEL_SPAN_EVENT_COUNT_LIMIT = 128 _DEFAULT_OTEL_SPAN_LINK_COUNT_LIMIT = 128 - +_DEFAULT_OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT = None _ENV_VALUE_UNSET = "unset" + # pylint: disable=protected-access _TRACE_SAMPLER = sampling._get_from_env_or_default() @@ -525,6 +527,9 @@ class SpanLimits: max_links: Maximum number of links that can be added to a Span. Environment variable: OTEL_SPAN_LINK_COUNT_LIMIT Default: {_DEFAULT_SPAN_LINK_COUNT_LIMIT} + max_attribute_size: Maximum length a string attribute can have. + Environment variable: OTEL_SPAN_LINK_COUNT_LIMIT + Default: {_DEFAULT_SPAN_LINK_COUNT_LIMIT} """ UNSET = -1 @@ -532,12 +537,14 @@ class SpanLimits: max_attributes: int max_events: int max_links: int + max_attribute_size: Optional[int] def __init__( self, max_attributes: Optional[int] = None, max_events: Optional[int] = None, max_links: Optional[int] = None, + max_attribute_size: Optional[int] = None, ): self.max_attributes = self._from_env_if_absent( max_attributes, @@ -555,9 +562,18 @@ def __init__( _DEFAULT_OTEL_SPAN_LINK_COUNT_LIMIT, ) + self.max_attribute_size = self._from_env_if_absent( + max_attribute_size, + OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT, + _DEFAULT_OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT, + ) + def __repr__(self): - return "max_attributes={}, max_events={}, max_links={}".format( - self.max_attributes, self.max_events, self.max_links + return "max_attributes={}, max_events={}, max_links={}, max_attribute_size={}".format( + self.max_attributes, + self.max_events, + self.max_links, + self.max_attribute_size, ) @classmethod @@ -661,7 +677,7 @@ def __init__( self._limits = limits self._lock = threading.Lock() - _filter_attributes(attributes) + _clean_attributes(attributes, self._limits.max_attribute_size) if not attributes: self._attributes = self._new_attributes() else: @@ -672,7 +688,9 @@ def __init__( self._events = self._new_events() if events: for event in events: - _filter_attributes(event.attributes) + _clean_attributes( + event.attributes, self._limits.max_attribute_size + ) # pylint: disable=protected-access event._attributes = _create_immutable_attributes( event.attributes @@ -684,6 +702,9 @@ def __init__( else: self._links = BoundedList.from_seq(self._limits.max_links, links) + for link in self._links: + _clean_attributes(link.attributes, self._limits.max_attribute_size) + def __repr__(self): return '{}(name="{}", context={})'.format( type(self).__name__, self._name, self._context @@ -710,25 +731,18 @@ def set_attributes( return for key, value in attributes.items(): - if not _is_valid_attribute_value(value): + valid, cleaned = _clean_attribute_value( + value, self._limits.max_attribute_size + ) + if not valid: continue + if cleaned is not None: + value = cleaned + if not key: logger.warning("invalid key `%s` (empty or null)", key) continue - - # Freeze mutable sequences defensively - if isinstance(value, MutableSequence): - value = tuple(value) - if isinstance(value, bytes): - try: - value = value.decode() - except ValueError: - logger.warning( - "Byte attribute could not be decoded for key `%s`.", - key, - ) - return self._attributes[key] = value def set_attribute(self, key: str, value: types.AttributeValue) -> None: @@ -744,7 +758,7 @@ def add_event( attributes: types.Attributes = None, timestamp: Optional[int] = None, ) -> None: - _filter_attributes(attributes) + _clean_attributes(attributes, self._limits.max_attribute_size) attributes = _create_immutable_attributes(attributes) self._add_event( Event( @@ -1027,6 +1041,13 @@ def __init__( if shutdown_on_exit: self._atexit_handler = atexit.register(self.shutdown) + if self._resource and self._limits.max_attribute_size: + resource_attributes = self._resource.attributes + _clean_attributes( + resource_attributes, self._limits.max_attribute_size + ) + self._resource = Resource(resource_attributes) + @property def resource(self) -> Resource: return self._resource diff --git a/opentelemetry-sdk/tests/trace/test_trace.py b/opentelemetry-sdk/tests/trace/test_trace.py index 0781042a337..5ec39936238 100644 --- a/opentelemetry-sdk/tests/trace/test_trace.py +++ b/opentelemetry-sdk/tests/trace/test_trace.py @@ -27,6 +27,7 @@ from opentelemetry.sdk import resources, trace from opentelemetry.sdk.environment_variables import ( OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT, + OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT, OTEL_SPAN_EVENT_COUNT_LIMIT, OTEL_SPAN_LINK_COUNT_LIMIT, OTEL_TRACES_SAMPLER, @@ -40,8 +41,10 @@ from opentelemetry.util._time import _time_ns -def new_tracer(span_limits=None) -> trace_api.Tracer: - return trace.TracerProvider(span_limits=span_limits).get_tracer(__name__) +def new_tracer(resource=None, span_limits=None) -> trace_api.Tracer: + return trace.TracerProvider( + resource=resource, span_limits=span_limits + ).get_tracer(__name__) class TestTracer(unittest.TestCase): @@ -673,6 +676,58 @@ def test_byte_type_attribute_value(self): isinstance(root.attributes["valid-byte-type-attribute"], str) ) + @mock.patch.dict( + "os.environ", + {OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT: "5"}, + ) + def test_invalid_attribute_truncate(self): + attributes = { + "k1": "v" * 20, + "k2": ["v" * 20, "v" * 8, "v" * 2], + } + + id_generator = RandomIdGenerator() + other_context = trace_api.SpanContext( + trace_id=id_generator.generate_trace_id(), + span_id=id_generator.generate_span_id(), + is_remote=False, + ) + links = (trace_api.Link(other_context, attributes.copy()),) + + tracer = new_tracer(resource=Resource.create(attributes.copy())) + with tracer.start_as_current_span( + "root", links=links, attributes={"k3": "v" * 100} + ) as root: + root.set_attributes(attributes.copy()) + root.add_event("v1", attributes.copy()) + + self.assertEqual(len(root.attributes), 3) + self.assertEqual(len(root.attributes["k1"]), 5) + self.assertEqual(len(root.attributes["k2"][0]), 5) + self.assertEqual(len(root.attributes["k2"][1]), 5) + self.assertEqual(len(root.attributes["k2"][2]), 2) + self.assertEqual(len(root.attributes["k3"]), 5) + + resource_attributes = root.resource.attributes + self.assertEqual(len(resource_attributes["k1"]), 5) + self.assertEqual(len(resource_attributes["k2"][0]), 5) + self.assertEqual(len(resource_attributes["k2"][1]), 5) + self.assertEqual(len(resource_attributes["k2"][2]), 2) + + self.assertEqual(len(root.events), 1) + event = root.events[0] + self.assertEqual(len(event.attributes["k1"]), 5) + self.assertEqual(len(event.attributes["k2"][0]), 5) + self.assertEqual(len(event.attributes["k2"][1]), 5) + self.assertEqual(len(event.attributes["k2"][2]), 2) + + self.assertEqual(len(root.links), 1) + link = root.links[0] + self.assertEqual(len(link.attributes["k1"]), 5) + self.assertEqual(len(link.attributes["k2"][0]), 5) + self.assertEqual(len(link.attributes["k2"][1]), 5) + self.assertEqual(len(link.attributes["k2"][2]), 2) + def test_sampling_attributes(self): sampling_attributes = { "sampler-attr": "sample-val",