From 95a78b0df2f5e85ee49289fda43623df12c23b62 Mon Sep 17 00:00:00 2001 From: Owais Lone Date: Tue, 11 May 2021 06:02:44 +0530 Subject: [PATCH] Added max attribute length span limit support --- CHANGELOG.md | 2 + .../src/opentelemetry/attributes/__init__.py | 85 ++++++--- .../tests/attributes/test_attributes.py | 161 +++++++++++++++--- .../sdk/environment_variables/__init__.py | 5 + .../opentelemetry/sdk/resources/__init__.py | 4 +- .../src/opentelemetry/sdk/trace/__init__.py | 48 +++++- opentelemetry-sdk/tests/trace/test_trace.py | 58 ++++++- 7 files changed, 296 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5477ae7940d..30cdfbd30eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#1829](https://github.com/open-telemetry/opentelemetry-python/pull/1829)) - Lazily read/configure limits and allow limits to be unset. ([#1839](https://github.com/open-telemetry/opentelemetry-python/pull/1839)) +- Added support for read/configure limits and allow limits to be unset. + ([#1839](https://github.com/open-telemetry/opentelemetry-python/pull/1839)) ### Changed - Fixed OTLP gRPC exporter silently failing if scheme is not specified in endpoint. diff --git a/opentelemetry-api/src/opentelemetry/attributes/__init__.py b/opentelemetry-api/src/opentelemetry/attributes/__init__.py index 6875f56631f..41305089c4e 100644 --- a/opentelemetry-api/src/opentelemetry/attributes/__init__.py +++ b/opentelemetry-api/src/opentelemetry/attributes/__init__.py @@ -15,7 +15,7 @@ import logging from types import MappingProxyType -from typing import MutableSequence, Sequence +from typing import MutableSequence, Optional, Sequence, Tuple from opentelemetry.util import types @@ -25,7 +25,9 @@ _logger = logging.getLogger(__name__) -def _is_valid_attribute_value(value: types.AttributeValue) -> bool: +def _clean_attribute_value( + value: types.AttributeValue, max_length: Optional[int] +) -> Tuple[bool, Optional[types.AttributeValue]]: """Checks if attribute value is valid. An attribute value is valid if it is either: @@ -33,15 +35,23 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool: point (IEEE 754-1985) or integer. - An array of primitive type values. The array MUST be homogeneous, i.e. it MUST NOT contain values of different types. + + When the ``max_length`` argument is set, any strings values longer than the value + are truncated and returned back as the second value. + If the attribute value is not modified, ``None`` is returned as the second return value. """ - if isinstance(value, Sequence): + # pylint: disable=too-many-branches + modified = False + if isinstance(value, Sequence) and not isinstance(value, (str, bytes)): if len(value) == 0: - return True + return True, None sequence_first_valid_type = None + new_value = [] for element in value: if element is None: + new_value.append(element) continue element_type = type(element) if element_type not in _VALID_ATTR_VALUE_TYPES: @@ -54,7 +64,7 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool: for valid_type in _VALID_ATTR_VALUE_TYPES ], ) - return False + return False, None # The type of the sequence must be homogeneous. The first non-None # element determines the type of the sequence if sequence_first_valid_type is None: @@ -65,7 +75,22 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool: sequence_first_valid_type.__name__, type(element).__name__, ) - return False + return False, None + if max_length is not None and isinstance(element, str): + element = element[:max_length] + modified = True + new_value.append(element) + if isinstance(value, MutableSequence): + modified = True + value = tuple(new_value) + + elif isinstance(value, bytes): + try: + value = value.decode() + modified = True + except ValueError: + _logger.warning("Byte attribute could not be decoded.") + return False, None elif not isinstance(value, _VALID_ATTR_VALUE_TYPES): _logger.warning( @@ -74,34 +99,38 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool: type(value).__name__, [valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES], ) - return False - return True + return False, None + + if max_length is not None and isinstance(value, str): + value = value[:max_length] + modified = True + return True, value if modified else None -def _filter_attributes(attributes: types.Attributes) -> None: - """Applies attribute validation rules and drops (key, value) pairs +def _clean_attributes( + attributes: types.Attributes, max_length: Optional[int] +) -> None: + """Applies attribute validation rules and truncates/drops (key, value) pairs that doesn't adhere to attributes specification. https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/common/common.md#attributes. """ - if attributes: - for attr_key, attr_value in list(attributes.items()): - if not attr_key: - _logger.warning("invalid key `%s` (empty or null)", attr_key) - attributes.pop(attr_key) - continue - - if _is_valid_attribute_value(attr_value): - if isinstance(attr_value, MutableSequence): - attributes[attr_key] = tuple(attr_value) - if isinstance(attr_value, bytes): - try: - attributes[attr_key] = attr_value.decode() - except ValueError: - attributes.pop(attr_key) - _logger.warning("Byte attribute could not be decoded.") - else: - attributes.pop(attr_key) + if not attributes: + return + + for attr_key, attr_value in list(attributes.items()): + if not attr_key: + _logger.warning("invalid key `%s` (empty or null)", attr_key) + attributes.pop(attr_key) + continue + + valid, cleaned_value = _clean_attribute_value(attr_value, max_length) + if not valid: + attributes.pop(attr_key) + continue + + if cleaned_value is not None: + attributes[attr_key] = cleaned_value def _create_immutable_attributes( diff --git a/opentelemetry-api/tests/attributes/test_attributes.py b/opentelemetry-api/tests/attributes/test_attributes.py index 2a391f78af7..2a0c58e0a55 100644 --- a/opentelemetry-api/tests/attributes/test_attributes.py +++ b/opentelemetry-api/tests/attributes/test_attributes.py @@ -17,42 +17,151 @@ import unittest from opentelemetry.attributes import ( + _clean_attribute_value, + _clean_attributes, _create_immutable_attributes, - _filter_attributes, - _is_valid_attribute_value, ) class TestAttributes(unittest.TestCase): - def test_is_valid_attribute_value(self): - self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, "ss", 4])) - self.assertFalse(_is_valid_attribute_value([dict(), 1, 2, 3.4, 4])) - self.assertFalse(_is_valid_attribute_value(["sw", "lf", 3.4, "ss"])) - self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, 5])) - self.assertFalse(_is_valid_attribute_value(dict())) - self.assertTrue(_is_valid_attribute_value(True)) - self.assertTrue(_is_valid_attribute_value("hi")) - self.assertTrue(_is_valid_attribute_value(3.4)) - self.assertTrue(_is_valid_attribute_value(15)) - self.assertTrue(_is_valid_attribute_value([1, 2, 3, 5])) - self.assertTrue(_is_valid_attribute_value([1.2, 2.3, 3.4, 4.5])) - self.assertTrue(_is_valid_attribute_value([True, False])) - self.assertTrue(_is_valid_attribute_value(["ss", "dw", "fw"])) - self.assertTrue(_is_valid_attribute_value([])) - # None in sequences are valid - self.assertTrue(_is_valid_attribute_value(["A", None, None])) - self.assertTrue(_is_valid_attribute_value(["A", None, None, "B"])) - self.assertTrue(_is_valid_attribute_value([None, None])) - self.assertFalse(_is_valid_attribute_value(["A", None, 1])) - self.assertFalse(_is_valid_attribute_value([None, "A", None, 1])) + def assertCleanAttr(self, value, valid): + # pylint: disable=protected-access + is_valid, cleaned = _clean_attribute_value(value, None) + self.assertEqual(is_valid, valid) + self.assertEqual(cleaned, value if valid else None) - def test_filter_attributes(self): + def test_validate_attribute_value(self): + test_cases = [ + ( + [1, 2, 3.4, "ss", 4], + False, + ), + ( + [dict(), 1, 2, 3.4, 4], + False, + ), + ( + ["sw", "lf", 3.4, "ss"], + False, + ), + ( + [1, 2, 3.4, 5], + False, + ), + ( + dict(), + False, + ), + ( + True, + True, + ), + ( + "hi", + True, + ), + ( + 3.4, + True, + ), + ( + 15, + True, + ), + ( + (1, 2, 3, 5), + True, + ), + ( + (1.2, 2.3, 3.4, 4.5), + True, + ), + ( + (True, False), + True, + ), + ( + ("ss", "dw", "fw"), + True, + ), + ( + [], + True, + ), + # None in sequences are valid + ( + ("A", None, None), + True, + ), + ( + ("A", None, None, "B"), + True, + ), + ( + (None, None), + True, + ), + ( + ["A", None, 1], + False, + ), + ( + [None, "A", None, 1], + False, + ), + ] + + for value, want_valid in test_cases: + # pylint: disable=protected-access + got_valid, cleaned_value = _clean_attribute_value(value, None) + self.assertEqual(got_valid, want_valid) + self.assertIsNone(cleaned_value) + + def test_clean_attribute_value_truncate(self): + test_cases = [ + ("a" * 50, None, None), + ("a" * 50, "a" * 10, 10), + ("abc", "a", 1), + ("abc" * 50, "abcabcabca", 10), + ("abc" * 50, "abc" * 50, 1000), + ("abc" * 50, None, None), + ([1, 2, 3, 5], (1, 2, 3, 5), 10), + ( + [1.2, 2.3], + ( + 1.2, + 2.3, + ), + 20, + ), + ([True, False], (True, False), 10), + ([], None, 10), + (True, None, 10), + ( + 3.4, + None, + True, + ), + ( + 15, + None, + True, + ), + ] + + for value, expected, limit in test_cases: + # pylint: disable=protected-access + valid, cleaned = _clean_attribute_value(value, limit) + self.assertTrue(valid) + self.assertEqual(cleaned, expected) + + def test_clean_attributes(self): attrs_with_invalid_keys = { "": "empty-key", None: "None-value", "attr-key": "attr-value", } - _filter_attributes(attrs_with_invalid_keys) + _clean_attributes(attrs_with_invalid_keys, None) self.assertTrue(len(attrs_with_invalid_keys), 1) self.assertEqual(attrs_with_invalid_keys, {"attr-key": "attr-value"}) @@ -66,7 +175,7 @@ def test_filter_attributes(self): "boolkey": True, "valid-byte-string": b"hello-otel", } - _filter_attributes(attrs_with_invalid_values) + _clean_attributes(attrs_with_invalid_values, None) self.assertEqual(len(attrs_with_invalid_values), 5) self.assertEqual( attrs_with_invalid_values, diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py index 0aa5aa515d7..70f3e3d9441 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py @@ -73,6 +73,11 @@ .. envvar:: OTEL_BSP_MAX_EXPORT_BATCH_SIZE """ +OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT = "OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT" +""" +.. envvar:: OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT +""" + OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT = "OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT" """ .. envvar:: OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py index 8755b2d1f90..212cb24bfd3 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py @@ -64,7 +64,7 @@ import pkg_resources -from opentelemetry.attributes import _filter_attributes +from opentelemetry.attributes import _clean_attributes from opentelemetry.sdk.environment_variables import ( OTEL_RESOURCE_ATTRIBUTES, OTEL_SERVICE_NAME, @@ -142,7 +142,7 @@ class Resource: """A Resource is an immutable representation of the entity producing telemetry as Attributes.""" def __init__(self, attributes: Attributes): - _filter_attributes(attributes) + _clean_attributes(attributes, None) self._attributes = attributes.copy() @staticmethod diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py index c202f5cbea1..81384630765 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py @@ -40,13 +40,14 @@ from opentelemetry import context as context_api from opentelemetry import trace as trace_api from opentelemetry.attributes import ( + _clean_attribute_value, + _clean_attributes, _create_immutable_attributes, - _filter_attributes, - _is_valid_attribute_value, ) from opentelemetry.sdk import util from opentelemetry.sdk.environment_variables import ( OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT, + OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT, OTEL_SPAN_EVENT_COUNT_LIMIT, OTEL_SPAN_LINK_COUNT_LIMIT, ) @@ -65,6 +66,7 @@ _DEFAULT_SPAN_EVENTS_LIMIT = 128 _DEFAULT_SPAN_LINKS_LIMIT = 128 _DEFAULT_SPAN_ATTRIBUTES_LIMIT = 128 +_DEFAULT_SPAN_ATTRIBUTE_SIZE_LIMIT = None # pylint: disable=protected-access _TRACE_SAMPLER = sampling._get_from_env_or_default() @@ -512,6 +514,7 @@ class _Limits: max_events: Maximum number of events that can be added to a Span. max_links: Maximum number of links that can be added to a Span. max_attributes: Maximum number of attributes that can be added to a Span. + max_attribute_value: Maximum length a string attribute can have. """ UNSET = -1 @@ -519,12 +522,14 @@ class _Limits: max_attributes: int max_events: int max_links: int + max_attribute_size: Optional[int] def __init__( self, max_attributes: Optional[int] = None, max_events: Optional[int] = None, max_links: Optional[int] = None, + max_attribute_size: Optional[int] = None, ): self.max_attributes = self._from_env_if_absent( max_attributes, @@ -538,9 +543,18 @@ def __init__( max_links, OTEL_SPAN_LINK_COUNT_LIMIT, _DEFAULT_SPAN_LINKS_LIMIT ) + self.max_attribute_size = self._from_env_if_absent( + max_attribute_size, + OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT, + _DEFAULT_SPAN_ATTRIBUTE_SIZE_LIMIT, + ) + def __repr__(self): - return "max_attributes={}, max_events={}, max_links={}".format( - self.max_attributes, self.max_events, self.max_links + return "max_attributes={}, max_events={}, max_links={}, max_attribute_size={}".format( + self.max_attributes, + self.max_events, + self.max_links, + self.max_attribute_size, ) @classmethod @@ -639,7 +653,7 @@ def __init__( self._span_processor = span_processor self._lock = threading.Lock() - _filter_attributes(attributes) + _clean_attributes(attributes, self._limits.max_attribute_size) if not attributes: self._attributes = self._new_attributes() else: @@ -650,7 +664,9 @@ def __init__( self._events = self._new_events() if events: for event in events: - _filter_attributes(event.attributes) + _clean_attributes( + event.attributes, self._limits.max_attribute_size + ) # pylint: disable=protected-access event._attributes = _create_immutable_attributes( event.attributes @@ -662,6 +678,9 @@ def __init__( else: self._links = BoundedList.from_seq(self._limits.max_links, links) + for link in self._links: + _clean_attributes(link.attributes, self._limits.max_attribute_size) + def __repr__(self): return '{}(name="{}", context={})'.format( type(self).__name__, self._name, self._context @@ -688,9 +707,15 @@ def set_attributes( return for key, value in attributes.items(): - if not _is_valid_attribute_value(value): + valid, cleaned = _clean_attribute_value( + value, self._limits.max_attribute_size + ) + if not valid: continue + if cleaned is not None: + value = cleaned + if not key: logger.warning("invalid key `%s` (empty or null)", key) continue @@ -722,7 +747,7 @@ def add_event( attributes: types.Attributes = None, timestamp: Optional[int] = None, ) -> None: - _filter_attributes(attributes) + _clean_attributes(attributes, self._limits.max_attribute_size) attributes = _create_immutable_attributes(attributes) self._add_event( Event( @@ -1007,6 +1032,13 @@ def __init__( if shutdown_on_exit: self._atexit_handler = atexit.register(self.shutdown) + if self._resource and self._limits.max_attribute_size: + resource_attributes = self._resource.attributes + _clean_attributes( + resource_attributes, self._limits.max_attribute_size + ) + self._resource = Resource(resource_attributes) + @property def resource(self) -> Resource: return self._resource diff --git a/opentelemetry-sdk/tests/trace/test_trace.py b/opentelemetry-sdk/tests/trace/test_trace.py index c9c523c3fab..cb21197b14d 100644 --- a/opentelemetry-sdk/tests/trace/test_trace.py +++ b/opentelemetry-sdk/tests/trace/test_trace.py @@ -27,6 +27,7 @@ from opentelemetry.sdk import resources, trace from opentelemetry.sdk.environment_variables import ( OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT, + OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT, OTEL_SPAN_EVENT_COUNT_LIMIT, OTEL_SPAN_LINK_COUNT_LIMIT, OTEL_TRACES_SAMPLER, @@ -40,8 +41,8 @@ from opentelemetry.util._time import _time_ns -def new_tracer() -> trace_api.Tracer: - return trace.TracerProvider().get_tracer(__name__) +def new_tracer(resource=None) -> trace_api.Tracer: + return trace.TracerProvider(resource=resource).get_tracer(__name__) class TestTracer(unittest.TestCase): @@ -673,6 +674,58 @@ def test_byte_type_attribute_value(self): isinstance(root.attributes["valid-byte-type-attribute"], str) ) + @mock.patch.dict( + "os.environ", + {OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT: "5"}, + ) + def test_invalid_attribute_truncate(self): + attributes = { + "k1": "v" * 20, + "k2": ["v" * 20, "v" * 8, "v" * 2], + } + + id_generator = RandomIdGenerator() + other_context = trace_api.SpanContext( + trace_id=id_generator.generate_trace_id(), + span_id=id_generator.generate_span_id(), + is_remote=False, + ) + links = (trace_api.Link(other_context, attributes.copy()),) + + tracer = new_tracer(resource=Resource.create(attributes.copy())) + with tracer.start_as_current_span( + "root", links=links, attributes={"k3": "v" * 100} + ) as root: + root.set_attributes(attributes.copy()) + root.add_event("v1", attributes.copy()) + + self.assertEqual(len(root.attributes), 3) + self.assertEqual(len(root.attributes["k1"]), 5) + self.assertEqual(len(root.attributes["k2"][0]), 5) + self.assertEqual(len(root.attributes["k2"][1]), 5) + self.assertEqual(len(root.attributes["k2"][2]), 2) + self.assertEqual(len(root.attributes["k3"]), 5) + + resource_attributes = root.resource.attributes + self.assertEqual(len(resource_attributes["k1"]), 5) + self.assertEqual(len(resource_attributes["k2"][0]), 5) + self.assertEqual(len(resource_attributes["k2"][1]), 5) + self.assertEqual(len(resource_attributes["k2"][2]), 2) + + self.assertEqual(len(root.events), 1) + event = root.events[0] + self.assertEqual(len(event.attributes["k1"]), 5) + self.assertEqual(len(event.attributes["k2"][0]), 5) + self.assertEqual(len(event.attributes["k2"][1]), 5) + self.assertEqual(len(event.attributes["k2"][2]), 2) + + self.assertEqual(len(root.links), 1) + link = root.links[0] + self.assertEqual(len(link.attributes["k1"]), 5) + self.assertEqual(len(link.attributes["k2"][0]), 5) + self.assertEqual(len(link.attributes["k2"][1]), 5) + self.assertEqual(len(link.attributes["k2"][2]), 2) + def test_sampling_attributes(self): sampling_attributes = { "sampler-attr": "sample-val", @@ -1363,7 +1416,6 @@ def test_span_limits_env(self): ) def test_span_no_limits_env(self): num_links = int(trace._DEFAULT_SPAN_LINKS_LIMIT) + randint(1, 100) - tracer = new_tracer() id_generator = RandomIdGenerator() some_links = [