Skip to content

Commit

Permalink
Added max attribute length span limit support
Browse files Browse the repository at this point in the history
  • Loading branch information
owais committed Jun 1, 2021
1 parent ae01f70 commit 5be7a88
Show file tree
Hide file tree
Showing 7 changed files with 311 additions and 80 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased](https://github.com/open-telemetry/opentelemetry-python/compare/v1.2.0-0.21b0...HEAD)

### Added
- Added support for OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT.
([#1847](https://github.com/open-telemetry/opentelemetry-python/pull/1847))

### Added
- Allow span limits to be set programatically via TracerProvider.
([#1877](https://github.com/open-telemetry/opentelemetry-python/pull/1877))
Expand Down
93 changes: 65 additions & 28 deletions opentelemetry-api/src/opentelemetry/attributes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import logging
from types import MappingProxyType
from typing import MutableSequence, Sequence
from typing import MutableSequence, Optional, Sequence, Tuple

from opentelemetry.util import types

Expand All @@ -25,23 +25,40 @@
_logger = logging.getLogger(__name__)


def _is_valid_attribute_value(value: types.AttributeValue) -> bool:
def _clean_attribute_value(
value: types.AttributeValue, max_size: Optional[int]
) -> Tuple[bool, Optional[types.AttributeValue]]:
"""Checks if attribute value is valid.
An attribute value is valid if it is either:
- A primitive type: string, boolean, double precision floating
point (IEEE 754-1985) or integer.
- An array of primitive type values. The array MUST be homogeneous,
i.e. it MUST NOT contain values of different types.
This function tries to clean attribute values according to the following rules:
- bytes are decoded to strings.
- When ``max_size`` argument is set, any string/bytes values longer than the value
are truncated to the specified max length.
- ``Sequence`` values other than strings such as lists are converted to immutable tuples.
If the attribute value is modified or converted to another type, the new value is returned
as the second return value.
If the attribute value is not modified, ``None`` is returned as the second return value.
"""

if isinstance(value, Sequence):
# pylint: disable=too-many-branches
modified = False
if isinstance(value, Sequence) and not isinstance(value, (str, bytes)):
if len(value) == 0:
return True
return True, None

sequence_first_valid_type = None
new_value = []
for element in value:
if element is None:
new_value.append(element)
continue
element_type = type(element)
if element_type not in _VALID_ATTR_VALUE_TYPES:
Expand All @@ -54,7 +71,7 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool:
for valid_type in _VALID_ATTR_VALUE_TYPES
],
)
return False
return False, None
# The type of the sequence must be homogeneous. The first non-None
# element determines the type of the sequence
if sequence_first_valid_type is None:
Expand All @@ -65,7 +82,23 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool:
sequence_first_valid_type.__name__,
type(element).__name__,
)
return False
return False, None
if max_size is not None and isinstance(element, str):
element = element[:max_size]
modified = True
new_value.append(element)
# Freeze mutable sequences defensively
if isinstance(value, MutableSequence):
modified = True
value = tuple(new_value)

elif isinstance(value, bytes):
try:
value = value.decode()
modified = True
except ValueError:
_logger.warning("Byte attribute could not be decoded.")
return False, None

elif not isinstance(value, _VALID_ATTR_VALUE_TYPES):
_logger.warning(
Expand All @@ -74,34 +107,38 @@ def _is_valid_attribute_value(value: types.AttributeValue) -> bool:
type(value).__name__,
[valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES],
)
return False
return True
return False, None

if max_size is not None and isinstance(value, str):
value = value[:max_size]
modified = True
return True, value if modified else None

def _filter_attributes(attributes: types.Attributes) -> None:
"""Applies attribute validation rules and drops (key, value) pairs

def _clean_attributes(
attributes: types.Attributes, max_size: Optional[int]
) -> None:
"""Applies attribute validation rules and truncates/drops (key, value) pairs
that doesn't adhere to attributes specification.
https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/common/common.md#attributes.
"""
if attributes:
for attr_key, attr_value in list(attributes.items()):
if not attr_key:
_logger.warning("invalid key `%s` (empty or null)", attr_key)
attributes.pop(attr_key)
continue

if _is_valid_attribute_value(attr_value):
if isinstance(attr_value, MutableSequence):
attributes[attr_key] = tuple(attr_value)
if isinstance(attr_value, bytes):
try:
attributes[attr_key] = attr_value.decode()
except ValueError:
attributes.pop(attr_key)
_logger.warning("Byte attribute could not be decoded.")
else:
attributes.pop(attr_key)
if not attributes:
return

for attr_key, attr_value in list(attributes.items()):
if not attr_key:
_logger.warning("invalid key `%s` (empty or null)", attr_key)
attributes.pop(attr_key)
continue

valid, cleaned_value = _clean_attribute_value(attr_value, max_size)
if not valid:
attributes.pop(attr_key)
continue

if cleaned_value is not None:
attributes[attr_key] = cleaned_value


def _create_immutable_attributes(
Expand Down
161 changes: 135 additions & 26 deletions opentelemetry-api/tests/attributes/test_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,42 +17,151 @@
import unittest

from opentelemetry.attributes import (
_clean_attribute_value,
_clean_attributes,
_create_immutable_attributes,
_filter_attributes,
_is_valid_attribute_value,
)


class TestAttributes(unittest.TestCase):
def test_is_valid_attribute_value(self):
self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, "ss", 4]))
self.assertFalse(_is_valid_attribute_value([dict(), 1, 2, 3.4, 4]))
self.assertFalse(_is_valid_attribute_value(["sw", "lf", 3.4, "ss"]))
self.assertFalse(_is_valid_attribute_value([1, 2, 3.4, 5]))
self.assertFalse(_is_valid_attribute_value(dict()))
self.assertTrue(_is_valid_attribute_value(True))
self.assertTrue(_is_valid_attribute_value("hi"))
self.assertTrue(_is_valid_attribute_value(3.4))
self.assertTrue(_is_valid_attribute_value(15))
self.assertTrue(_is_valid_attribute_value([1, 2, 3, 5]))
self.assertTrue(_is_valid_attribute_value([1.2, 2.3, 3.4, 4.5]))
self.assertTrue(_is_valid_attribute_value([True, False]))
self.assertTrue(_is_valid_attribute_value(["ss", "dw", "fw"]))
self.assertTrue(_is_valid_attribute_value([]))
# None in sequences are valid
self.assertTrue(_is_valid_attribute_value(["A", None, None]))
self.assertTrue(_is_valid_attribute_value(["A", None, None, "B"]))
self.assertTrue(_is_valid_attribute_value([None, None]))
self.assertFalse(_is_valid_attribute_value(["A", None, 1]))
self.assertFalse(_is_valid_attribute_value([None, "A", None, 1]))
def assertCleanAttr(self, value, valid):
# pylint: disable=protected-access
is_valid, cleaned = _clean_attribute_value(value, None)
self.assertEqual(is_valid, valid)
self.assertEqual(cleaned, value if valid else None)

def test_filter_attributes(self):
def test_validate_attribute_value(self):
test_cases = [
(
[1, 2, 3.4, "ss", 4],
False,
),
(
[dict(), 1, 2, 3.4, 4],
False,
),
(
["sw", "lf", 3.4, "ss"],
False,
),
(
[1, 2, 3.4, 5],
False,
),
(
dict(),
False,
),
(
True,
True,
),
(
"hi",
True,
),
(
3.4,
True,
),
(
15,
True,
),
(
(1, 2, 3, 5),
True,
),
(
(1.2, 2.3, 3.4, 4.5),
True,
),
(
(True, False),
True,
),
(
("ss", "dw", "fw"),
True,
),
(
[],
True,
),
# None in sequences are valid
(
("A", None, None),
True,
),
(
("A", None, None, "B"),
True,
),
(
(None, None),
True,
),
(
["A", None, 1],
False,
),
(
[None, "A", None, 1],
False,
),
]

for value, want_valid in test_cases:
# pylint: disable=protected-access
got_valid, cleaned_value = _clean_attribute_value(value, None)
self.assertEqual(got_valid, want_valid)
self.assertIsNone(cleaned_value)

def test_clean_attribute_value_truncate(self):
test_cases = [
("a" * 50, None, None),
("a" * 50, "a" * 10, 10),
("abc", "a", 1),
("abc" * 50, "abcabcabca", 10),
("abc" * 50, "abc" * 50, 1000),
("abc" * 50, None, None),
([1, 2, 3, 5], (1, 2, 3, 5), 10),
(
[1.2, 2.3],
(
1.2,
2.3,
),
20,
),
([True, False], (True, False), 10),
([], None, 10),
(True, None, 10),
(
3.4,
None,
True,
),
(
15,
None,
True,
),
]

for value, expected, limit in test_cases:
# pylint: disable=protected-access
valid, cleaned = _clean_attribute_value(value, limit)
self.assertTrue(valid)
self.assertEqual(cleaned, expected)

def test_clean_attributes(self):
attrs_with_invalid_keys = {
"": "empty-key",
None: "None-value",
"attr-key": "attr-value",
}
_filter_attributes(attrs_with_invalid_keys)
_clean_attributes(attrs_with_invalid_keys, None)
self.assertTrue(len(attrs_with_invalid_keys), 1)
self.assertEqual(attrs_with_invalid_keys, {"attr-key": "attr-value"})

Expand All @@ -66,7 +175,7 @@ def test_filter_attributes(self):
"boolkey": True,
"valid-byte-string": b"hello-otel",
}
_filter_attributes(attrs_with_invalid_values)
_clean_attributes(attrs_with_invalid_values, None)
self.assertEqual(len(attrs_with_invalid_values), 5)
self.assertEqual(
attrs_with_invalid_values,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@
.. envvar:: OTEL_BSP_MAX_EXPORT_BATCH_SIZE
"""

OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT = "OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT"
"""
.. envvar:: OTEL_SPAN_ATTRIBUTE_SIZE_LIMIT
"""

OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT = "OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT"
"""
.. envvar:: OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT
Expand Down
4 changes: 2 additions & 2 deletions opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@

import pkg_resources

from opentelemetry.attributes import _filter_attributes
from opentelemetry.attributes import _clean_attributes
from opentelemetry.sdk.environment_variables import (
OTEL_RESOURCE_ATTRIBUTES,
OTEL_SERVICE_NAME,
Expand Down Expand Up @@ -144,7 +144,7 @@ class Resource:
def __init__(
self, attributes: Attributes, schema_url: typing.Optional[str] = None
):
_filter_attributes(attributes)
_clean_attributes(attributes, None)
self._attributes = attributes.copy()
if schema_url is None:
schema_url = ""
Expand Down
Loading

0 comments on commit 5be7a88

Please sign in to comment.