From 00f90fd18f997f8ca6f534d63a86841732b05456 Mon Sep 17 00:00:00 2001 From: Owais Lone Date: Wed, 12 May 2021 17:03:40 +0530 Subject: [PATCH] Added max attribute length span limit support --- CHANGELOG.md | 2 + .../sdk/environment_variables/__init__.py | 5 + .../src/opentelemetry/sdk/trace/__init__.py | 112 +++++++--- opentelemetry-sdk/tests/trace/test_trace.py | 208 +++++++++++++++--- 4 files changed, 270 insertions(+), 57 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3d395f0b41..242e98d128b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#1829](https://github.com/open-telemetry/opentelemetry-python/pull/1829)) - Lazily read/configure limits and allow limits to be unset. ([#1839](https://github.com/open-telemetry/opentelemetry-python/pull/1839)) +- Added support for read/configure limits and allow limits to be unset. + ([#1839](https://github.com/open-telemetry/opentelemetry-python/pull/1839)) ### Changed - Fixed OTLP gRPC exporter silently failing if scheme is not specified in endpoint. diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py index 0aa5aa515d7..70f3e3d9441 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py @@ -73,6 +73,11 @@ .. envvar:: OTEL_BSP_MAX_EXPORT_BATCH_SIZE """ +OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT = "OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT" +""" +.. envvar:: OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT +""" + OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT = "OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT" """ .. envvar:: OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py index bba77915a24..be6bb0d6176 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py @@ -42,6 +42,7 @@ from opentelemetry.sdk import util from opentelemetry.sdk.environment_variables import ( OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT, + OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT, OTEL_SPAN_EVENT_COUNT_LIMIT, OTEL_SPAN_LINK_COUNT_LIMIT, ) @@ -61,6 +62,7 @@ _DEFAULT_SPAN_EVENTS_LIMIT = 128 _DEFAULT_SPAN_LINKS_LIMIT = 128 _DEFAULT_SPAN_ATTRIBUTES_LIMIT = 128 +_DEFAULT_SPAN_ATTRIBUTE_SIZE_LIMIT = None _VALID_ATTR_VALUE_TYPES = (bool, str, int, float) # pylint: disable=protected-access @@ -313,21 +315,29 @@ def attributes(self) -> types.Attributes: return self._attributes -def _is_valid_attribute_value(value: types.AttributeValue) -> bool: - """Checks if attribute value is valid. +def _clean_attribute_value( + value: types.AttributeValue, max_length: Optional[int] +) -> Tuple[bool, Optional[types.AttributeValue]]: + """Checks if attribute value is valid and cleans it up if required. An attribute value is valid if it is one of the valid types. If the value is a sequence, it is only valid if all items in the sequence: - are of the same valid type or None - are not a sequence + + When the ``max_length`` argument is set, any strings values longer than the value + are truncated and returned back as the second value. + + If the attribute value is not modified, the ``None`` is returned as the second return value. """ - if isinstance(value, Sequence): + modified = False + if isinstance(value, Sequence) and not isinstance(value, str): if len(value) == 0: - return True + return True, None sequence_first_valid_type = None - for element in value: + for idx, element in enumerate(value): if element is None: continue element_type = type(element) @@ -341,7 +351,7 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool: for valid_type in _VALID_ATTR_VALUE_TYPES ], ) - return False + return False, None # The type of the sequence must be homogeneous. The first non-None # element determines the type of the sequence if sequence_first_valid_type is None: @@ -352,7 +362,11 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool: sequence_first_valid_type.__name__, type(element).__name__, ) - return False + return False, None + if max_length is not None and isinstance(element, str): + element = element[:max_length] + value[idx] = element + modified = True elif not isinstance(value, _VALID_ATTR_VALUE_TYPES): logger.warning( @@ -361,18 +375,31 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool: type(value).__name__, [valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES], ) - return False - return True - - -def _filter_attribute_values(attributes: types.Attributes): - if attributes: - for attr_key, attr_value in list(attributes.items()): - if _is_valid_attribute_value(attr_value): - if isinstance(attr_value, MutableSequence): - attributes[attr_key] = tuple(attr_value) - else: - attributes.pop(attr_key) + return False, None + if max_length is not None and isinstance(value, str): + value = value[:max_length] + modified = True + + return True, value if modified else None + + +def _filter_attribute_values( + attributes: types.Attributes, max_length: Optional[int] +): + if not attributes: + return + + for attr_key, attr_value in list(attributes.items()): + valid, cleaned_value = _clean_attribute_value(attr_value, max_length) + if valid: + if isinstance(attr_value, MutableSequence): + if cleaned_value is not None: + attr_value = cleaned_value + attributes[attr_key] = tuple(attr_value) + if cleaned_value: + attributes[attr_key] = cleaned_value + else: + attributes.pop(attr_key) def _create_immutable_attributes(attributes): @@ -575,25 +602,32 @@ class _Limits: max_events: Maximum number of events that can be added to a Span. max_links: Maximum number of links that can be added to a Span. max_attributes: Maximum number of attributes that can be added to a Span. + max_attribute_value: Maximum length a string attribute an have. """ max_attributes: int max_events: int max_links: int + max_attribute_sizze: Optional[int] def __init__( self, max_events: Optional[int] = None, max_links: Optional[int] = None, max_attributes: Optional[int] = None, + max_attribute_size: Optional[int] = None, ): self.max_attributes = max_attributes self.max_events = max_events self.max_links = max_links + self.max_attribute_size = max_attribute_size def __repr__(self): - return "max_attributes={}, max_events={}, max_links={}".format( - self.max_attributes, self.max_events, self.max_links + return "max_attributes={}, max_events={}, max_links={}, max_attribute_size={}".format( + self.max_attributes, + self.max_events, + self.max_links, + self.max_attribute_size, ) @classmethod @@ -608,6 +642,10 @@ def _create(cls): max_links=cls._from_env( OTEL_SPAN_LINK_COUNT_LIMIT, _DEFAULT_SPAN_LINKS_LIMIT ), + max_attribute_size=cls._from_env( + OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT, + _DEFAULT_SPAN_ATTRIBUTE_SIZE_LIMIT, + ), ) @classmethod @@ -698,7 +736,7 @@ def __init__( self._limits = limits self._lock = threading.Lock() - _filter_attribute_values(attributes) + _filter_attribute_values(attributes, self._limits.max_attribute_size) if not attributes: self._attributes = self._new_attributes() else: @@ -709,7 +747,9 @@ def __init__( self._events = self._new_events() if events: for event in events: - _filter_attribute_values(event.attributes) + _filter_attribute_values( + event.attributes, self._limits.max_attribute_size + ) # pylint: disable=protected-access event._attributes = _create_immutable_attributes( event.attributes @@ -721,6 +761,11 @@ def __init__( else: self._links = BoundedList.from_seq(self._limits.max_links, links) + for link in self._links: + _filter_attribute_values( + link.attributes, self._limits.max_attribute_size + ) + def __repr__(self): return '{}(name="{}", context={})'.format( type(self).__name__, self._name, self._context @@ -747,9 +792,15 @@ def set_attributes( return for key, value in attributes.items(): - if not _is_valid_attribute_value(value): + valid, cleaned = _clean_attribute_value( + value, self._limits.max_attribute_size + ) + if not valid: continue + if cleaned is not None: + value = cleaned + if not key: logger.warning("invalid key `%s` (empty or null)", key) continue @@ -781,7 +832,7 @@ def add_event( attributes: types.Attributes = None, timestamp: Optional[int] = None, ) -> None: - _filter_attribute_values(attributes) + _filter_attribute_values(attributes, self._limits.max_attribute_size) attributes = _create_immutable_attributes(attributes) self._add_event( Event( @@ -919,13 +970,12 @@ def __init__( ], id_generator: IdGenerator, instrumentation_info: InstrumentationInfo, - span_limits: SpanLimits, ) -> None: self.sampler = sampler self.resource = resource self.span_processor = span_processor self.id_generator = id_generator - self._span_limits = span_limits + self._limits = None self.instrumentation_info = instrumentation_info self._limits = None @@ -1053,7 +1103,6 @@ def __init__( SynchronousMultiSpanProcessor, ConcurrentMultiSpanProcessor ] = None, id_generator: IdGenerator = None, - span_limits=None, ): self._active_span_processor = ( active_span_processor or SynchronousMultiSpanProcessor() @@ -1069,6 +1118,13 @@ def __init__( if shutdown_on_exit: self._atexit_handler = atexit.register(self.shutdown) + if self._resource and self._limits.max_attribute_size: + resource_attributes = self._resource.attributes + _filter_attribute_values( + resource_attributes, self._limits.max_attribute_size + ) + self._resource = Resource(resource_attributes) + @property def resource(self) -> Resource: return self._resource diff --git a/opentelemetry-sdk/tests/trace/test_trace.py b/opentelemetry-sdk/tests/trace/test_trace.py index fcc3b3c0108..d99f46adb78 100644 --- a/opentelemetry-sdk/tests/trace/test_trace.py +++ b/opentelemetry-sdk/tests/trace/test_trace.py @@ -29,6 +29,7 @@ from opentelemetry.sdk import resources, trace from opentelemetry.sdk.environment_variables import ( OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT, + OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT, OTEL_SPAN_EVENT_COUNT_LIMIT, OTEL_SPAN_LINK_COUNT_LIMIT, OTEL_TRACES_SAMPLER, @@ -42,8 +43,8 @@ from opentelemetry.util._time import _time_ns -def new_tracer(span_limits=None) -> trace_api.Tracer: - return trace.TracerProvider(span_limits=span_limits).get_tracer(__name__) +def new_tracer(resource=None) -> trace_api.Tracer: + return trace.TracerProvider(resource=resource).get_tracer(__name__) class TestTracer(unittest.TestCase): @@ -538,6 +539,12 @@ class TestSpan(unittest.TestCase): def setUp(self): self.tracer = new_tracer() + def assertCleanAttr(self, value, valid): + # pylint: disable=protected-access + is_valid, cleaned = trace._clean_attribute_value(value, None) + self.assertEqual(is_valid, valid) + self.assertEqual(cleaned, value if valid else None) + def test_basic_span(self): span = trace._Span("name", mock.Mock(spec=trace_api.SpanContext)) self.assertEqual(span.name, "name") @@ -646,34 +653,177 @@ def test_byte_type_attribute_value(self): isinstance(root.attributes["valid-byte-type-attribute"], str) ) - def test_check_attribute_helper(self): - # pylint: disable=protected-access - self.assertFalse(trace._is_valid_attribute_value([1, 2, 3.4, "ss", 4])) - self.assertFalse( - trace._is_valid_attribute_value([dict(), 1, 2, 3.4, 4]) - ) - self.assertFalse( - trace._is_valid_attribute_value(["sw", "lf", 3.4, "ss"]) - ) - self.assertFalse(trace._is_valid_attribute_value([1, 2, 3.4, 5])) - self.assertTrue(trace._is_valid_attribute_value([1, 2, 3, 5])) - self.assertTrue(trace._is_valid_attribute_value([1.2, 2.3, 3.4, 4.5])) - self.assertTrue(trace._is_valid_attribute_value([True, False])) - self.assertTrue(trace._is_valid_attribute_value(["ss", "dw", "fw"])) - self.assertTrue(trace._is_valid_attribute_value([])) - self.assertFalse(trace._is_valid_attribute_value(dict())) - self.assertTrue(trace._is_valid_attribute_value(True)) - self.assertTrue(trace._is_valid_attribute_value("hi")) - self.assertTrue(trace._is_valid_attribute_value(3.4)) - self.assertTrue(trace._is_valid_attribute_value(15)) - # None in sequences are valid - self.assertTrue(trace._is_valid_attribute_value(["A", None, None])) - self.assertTrue( - trace._is_valid_attribute_value(["A", None, None, "B"]) + @mock.patch.dict( + "os.environ", + {OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT: "5"}, + ) + def test_invalid_attribute_truncate(self): + attributes = { + "k1": "v" * 20, + "k2": ["v" * 20, "v" * 8, "v" * 2], + } + + id_generator = RandomIdGenerator() + other_context = trace_api.SpanContext( + trace_id=id_generator.generate_trace_id(), + span_id=id_generator.generate_span_id(), + is_remote=False, ) - self.assertTrue(trace._is_valid_attribute_value([None, None])) - self.assertFalse(trace._is_valid_attribute_value(["A", None, 1])) - self.assertFalse(trace._is_valid_attribute_value([None, "A", None, 1])) + links = (trace_api.Link(other_context, attributes.copy()),) + + tracer = new_tracer(resource=Resource.create(attributes.copy())) + with tracer.start_as_current_span( + "root", links=links, attributes={"k3": "v" * 100} + ) as root: + root.set_attributes(attributes.copy()) + root.add_event("v1", attributes.copy()) + + self.assertEqual(len(root.attributes), 3) + self.assertEqual(len(root.attributes["k1"]), 5) + self.assertEqual(len(root.attributes["k2"][0]), 5) + self.assertEqual(len(root.attributes["k2"][1]), 5) + self.assertEqual(len(root.attributes["k2"][2]), 2) + self.assertEqual(len(root.attributes["k3"]), 5) + + resource_attributes = root.resource.attributes + self.assertEqual(len(resource_attributes["k1"]), 5) + self.assertEqual(len(resource_attributes["k2"][0]), 5) + self.assertEqual(len(resource_attributes["k2"][1]), 5) + self.assertEqual(len(resource_attributes["k2"][2]), 2) + + self.assertEqual(len(root.events), 1) + event = root.events[0] + self.assertEqual(len(event.attributes["k1"]), 5) + self.assertEqual(len(event.attributes["k2"][0]), 5) + self.assertEqual(len(event.attributes["k2"][1]), 5) + self.assertEqual(len(event.attributes["k2"][2]), 2) + + self.assertEqual(len(root.links), 1) + link = root.links[0] + self.assertEqual(len(link.attributes["k1"]), 5) + self.assertEqual(len(link.attributes["k2"][0]), 5) + self.assertEqual(len(link.attributes["k2"][1]), 5) + self.assertEqual(len(link.attributes["k2"][2]), 2) + + def test_check_attribute_helper_validate(self): + test_cases = [ + ( + [1, 2, 3.4, "ss", 4], + False, + ), + ( + [dict(), 1, 2, 3.4, 4], + False, + ), + ( + ["sw", "lf", 3.4, "ss"], + False, + ), + ( + [1, 2, 3.4, 5], + False, + ), + ( + [1, 2, 3, 5], + True, + ), + ( + [1.2, 2.3, 3.4, 4.5], + True, + ), + ( + [True, False], + True, + ), + ( + ["ss", "dw", "fw"], + True, + ), + ( + [], + True, + ), + ( + dict(), + False, + ), + ( + True, + True, + ), + ( + "hi", + True, + ), + ( + 3.4, + True, + ), + ( + 15, + True, + ), + # None in sequences are valid + ( + ["A", None, None], + True, + ), + ( + ["A", None, None, "B"], + True, + ), + ( + [None, None], + True, + ), + ( + ["A", None, 1], + False, + ), + ( + [None, "A", None, 1], + False, + ), + ] + + for value, want_valid in test_cases: + # pylint: disable=protected-access + got_valid, cleaned_value = trace._clean_attribute_value( + value, None + ) + self.assertEqual(got_valid, want_valid) + self.assertIsNone(cleaned_value) + + def test_check_attribute_helper_truncate(self): + test_cases = [ + ("a" * 50, None, None), + ("a" * 50, "a" * 10, 10), + ("abc", "a", 1), + ("abc" * 50, "abcabcabca", 10), + ("abc" * 50, "abc" * 50, 1000), + ("abc" * 50, None, None), + ([1, 2, 3, 5], None, 10), + ([1.2, 2.3], None, 20), + ([True, False], None, 10), + ([], None, 10), + (True, None, 10), + ( + 3.4, + None, + True, + ), + ( + 15, + None, + True, + ), + ] + + for value, expected, limit in test_cases: + # pylint: disable=protected-access + valid, cleaned = trace._clean_attribute_value(value, limit) + self.assertTrue(valid) + self.assertEqual(cleaned, expected) def test_sampling_attributes(self): sampling_attributes = {