Skip to content

Commit

Permalink
Remove duplicates in breadcrumbs
Browse files Browse the repository at this point in the history
Introduced a new method `_format_breadcrumbs` to handle the formatting of breadcrumbs in `SentryReporter`. This includes removing duplicates and ordering by timestamp. The logic for this has been extracted from the `_before_send` method.

Also, updated the `distinct_by` function in `sentry_tools.py` to use a getter function for comparison instead of a key. This provides more flexibility when determining uniqueness.

The `order_by_utc_time` function now accepts an optional key argument for sorting.

Corresponding tests have been added and updated to reflect these changes.
  • Loading branch information
drew2a committed May 27, 2024
1 parent 88e7778 commit 171b4aa
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 32 deletions.
25 changes: 18 additions & 7 deletions src/tribler/core/sentry_reporter/sentry_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from tribler.core import version
from tribler.core.sentry_reporter.sentry_tools import (
get_first_item,
distinct_by, get_first_item,
get_value, order_by_utc_time
)

Expand Down Expand Up @@ -340,6 +340,21 @@ def get_test_sentry_url() -> Optional[str]:
def is_in_test_mode():
return bool(SentryReporter.get_test_sentry_url())

def _format_breadcrumbs(self, event: Optional[Dict]):
""" Format breadcrumbs in the event:
- Remove duplicates
- Order by timestamp
"""
try:
if breadcrumbs := event.get(BREADCRUMBS):
breadcrumbs_values = breadcrumbs[VALUES]
breadcrumbs_values = distinct_by(breadcrumbs_values, key='timestamp')
breadcrumbs_values = order_by_utc_time(breadcrumbs_values, key='timestamp')

Check warning on line 352 in src/tribler/core/sentry_reporter/sentry_reporter.py

View check run for this annotation

Codecov / codecov/patch

src/tribler/core/sentry_reporter/sentry_reporter.py#L351-L352

Added lines #L351 - L352 were not covered by tests

event[BREADCRUMBS][VALUES] = breadcrumbs_values

Check warning on line 354 in src/tribler/core/sentry_reporter/sentry_reporter.py

View check run for this annotation

Codecov / codecov/patch

src/tribler/core/sentry_reporter/sentry_reporter.py#L354

Added line #L354 was not covered by tests
except Exception as e:
self._logger.exception(e)

def _before_send(self, event: Optional[Dict], hint: Optional[Dict]) -> Optional[Dict]:
"""The method that is called before each send. Both allowed and
disallowed.
Expand Down Expand Up @@ -386,12 +401,8 @@ def _before_send(self, event: Optional[Dict], hint: Optional[Dict]) -> Optional[
if self.scrubber:
event = self.scrubber.scrub_event(event)

# order breadcrumbs by timestamp in ascending order
if breadcrumbs := event.get(BREADCRUMBS):
try:
event[BREADCRUMBS][VALUES] = order_by_utc_time(breadcrumbs[VALUES])
except Exception as e:
self._logger.exception(e)
self._format_breadcrumbs(event)

return event

# pylint: disable=unused-argument
Expand Down
34 changes: 18 additions & 16 deletions src/tribler/core/sentry_reporter/sentry_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
"""
import re
from dataclasses import dataclass
from datetime import datetime
from typing import Dict, List, Optional
from typing import Any, Callable, Dict, List, Optional, Tuple

from faker import Faker

Expand Down Expand Up @@ -57,37 +56,39 @@ def modify_value(d, key, function):
return d


def distinct_by(list_of_dict, key):
def distinct_by(list_of_dict: Optional[List[Dict]],
getter: Callable[[Dict], Tuple[Any, Any]] = lambda b: (b["timestamp"], b["message"])):
"""This function removes all duplicates from a list of dictionaries. A duplicate
here is a dictionary that have the same value of the given key.
If no key field is presented in the item, then the item will not be considered
If no key field is presented in the dictionary, then the dictionary will not be considered
as a duplicate.
Args:
list_of_dict: list of dictionaries
key: a field key that will be used for items comparison
getter: function that returns a key for the comparison
Returns:
Array of distinct items
"""

if not list_of_dict or not key:
if not list_of_dict:
return list_of_dict

values_viewed = set()
seen = set()
result = []

for item in list_of_dict:
value = get_value(item, key, None)
if value is None:
result.append(item)
for d in list_of_dict:
try:
value = getter(d)
except (KeyError, TypeError):
result.append(d)
continue

if value not in values_viewed:
result.append(item)
if value not in seen:
result.append(d)

values_viewed.add(value)
seen.add(value)

return result

Expand Down Expand Up @@ -124,16 +125,17 @@ def obfuscate_string(s: str, part_of_speech: str = 'noun') -> str:
return faker.word(part_of_speech=part_of_speech)


def order_by_utc_time(breadcrumbs: Optional[List[Dict]]):
def order_by_utc_time(breadcrumbs: Optional[List[Dict]], key: str = 'timestamp'):
""" Order breadcrumbs by timestamp in ascending order.
Args:
breadcrumbs: List of breadcrumbs
key: Field name that will be used for sorting
Returns:
Ordered list of breadcrumbs
"""
if not breadcrumbs:
return breadcrumbs

return list(sorted(breadcrumbs, key=lambda breadcrumb: breadcrumb["timestamp"]))
return list(sorted(breadcrumbs, key=lambda breadcrumb: breadcrumb[key]))
66 changes: 57 additions & 9 deletions src/tribler/core/sentry_reporter/tests/test_sentry_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,65 @@ def test_safe_get():
assert get_value({'key': 'value'}, 'key1', {}) == {}


def test_distinct():
assert distinct_by(None, None) is None
assert distinct_by([], None) == []
assert distinct_by([{'key': 'b'}, {'key': 'b'}, {'key': 'c'}, {'': ''}], 'key') == [
{'key': 'b'},
{'key': 'c'},
{'': ''},
def test_distinct_none():
# Test distinct_by with None
assert distinct_by(None) is None


def test_distinct_default():
# Test distinct_by with default getter
values = [
{'message': 'message 1', 'timestamp': 'timestamp 1'},
{'message': 'message 1', 'timestamp': 'timestamp 1'},
{'message': 'message 2', 'timestamp': 'timestamp 2'}
]

expected = [
{'message': 'message 1', 'timestamp': 'timestamp 1'},
{'message': 'message 2', 'timestamp': 'timestamp 2'}
]
assert distinct_by(values) == expected


def test_distinct_key_error():
# Test distinct_by with missing key, it should keep the items
values = [
{'message': 'message 1', },
{'message': 'message 1', },
{'timestamp': 'timestamp 1', },
{'timestamp': 'timestamp 1', },
]

expected = [
{'message': 'message 1', },
{'message': 'message 1', },
{'timestamp': 'timestamp 1', },
{'timestamp': 'timestamp 1', },
]
assert distinct_by(values) == expected


def test_distinct_not_default():
# Test distinct_by with custom getter
values = [
{'message': 'message 1', 'timestamp': 'timestamp 1'},
{'message': 'message 2', 'timestamp': 'timestamp 2'},
{'message': 'message 3', 'timestamp': 'timestamp 1'},
{'message': 'message 4', 'timestamp': 'timestamp 2'},
]

expected = [
{'message': 'message 1', 'timestamp': 'timestamp 1'},
{'message': 'message 2', 'timestamp': 'timestamp 2'},
]
assert distinct_by(values, lambda b: b['timestamp']) == expected


# test nested
assert distinct_by([{'a': {}}], 'b') == [{'a': {}}]
def test_distinct_none_in_list():
# Test distinct_by with None in list
values = [None]
expected = [None]
assert distinct_by(values) == expected


FORMATTED_VERSIONS = [
Expand Down

0 comments on commit 171b4aa

Please sign in to comment.