Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove duplicates in breadcrumbs #8039

Merged
merged 1 commit into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions src/tribler/core/sentry_reporter/sentry_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from tribler.core import version
from tribler.core.sentry_reporter.sentry_tools import (
get_first_item,
distinct_by, get_first_item,
get_value, order_by_utc_time
)

Expand Down Expand Up @@ -340,6 +340,21 @@ def get_test_sentry_url() -> Optional[str]:
def is_in_test_mode():
return bool(SentryReporter.get_test_sentry_url())

def _format_breadcrumbs(self, event: Optional[Dict]):
""" Format breadcrumbs in the event:
- Remove duplicates
- Order by timestamp
"""
try:
if breadcrumbs := event.get(BREADCRUMBS):
breadcrumbs_values = breadcrumbs[VALUES]
breadcrumbs_values = distinct_by(breadcrumbs_values, getter=lambda b: (b["timestamp"], b["message"]))
breadcrumbs_values = order_by_utc_time(breadcrumbs_values)

event[BREADCRUMBS][VALUES] = breadcrumbs_values
except Exception as e:
self._logger.exception(e)

def _before_send(self, event: Optional[Dict], hint: Optional[Dict]) -> Optional[Dict]:
"""The method that is called before each send. Both allowed and
disallowed.
Expand Down Expand Up @@ -386,12 +401,8 @@ def _before_send(self, event: Optional[Dict], hint: Optional[Dict]) -> Optional[
if self.scrubber:
event = self.scrubber.scrub_event(event)

# order breadcrumbs by timestamp in ascending order
if breadcrumbs := event.get(BREADCRUMBS):
try:
event[BREADCRUMBS][VALUES] = order_by_utc_time(breadcrumbs[VALUES])
except Exception as e:
self._logger.exception(e)
self._format_breadcrumbs(event)

return event

# pylint: disable=unused-argument
Expand Down
45 changes: 19 additions & 26 deletions src/tribler/core/sentry_reporter/sentry_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
"""
import re
from dataclasses import dataclass
from datetime import datetime
from typing import Dict, List, Optional
from typing import Any, Callable, Dict, List, Optional, TypeVar

from faker import Faker

Expand Down Expand Up @@ -57,39 +56,32 @@ def modify_value(d, key, function):
return d


def distinct_by(list_of_dict, key):
T = TypeVar('T')


def distinct_by(items: Optional[List[T]], getter: Callable[[T], Any]) -> Optional[List[T]]:
"""This function removes all duplicates from a list of dictionaries. A duplicate
here is a dictionary that have the same value of the given key.

If no key field is presented in the item, then the item will not be considered
as a duplicate.
If no key field is presented in the dictionary, then the exception will be raised.

Args:
list_of_dict: list of dictionaries
key: a field key that will be used for items comparison
items: list of dictionaries
getter: function that returns a key for the comparison

Returns:
Array of distinct items
"""

if not list_of_dict or not key:
return list_of_dict

values_viewed = set()
result = []

for item in list_of_dict:
value = get_value(item, key, None)
if value is None:
result.append(item)
continue

if value not in values_viewed:
result.append(item)

values_viewed.add(value)
if not items:
return items

return result
distinct = {}
for item in items:
key = getter(item)
if key not in distinct:
distinct[key] = item
return list(distinct.values())


def format_version(version: Optional[str]) -> Optional[str]:
Expand Down Expand Up @@ -124,16 +116,17 @@ def obfuscate_string(s: str, part_of_speech: str = 'noun') -> str:
return faker.word(part_of_speech=part_of_speech)


def order_by_utc_time(breadcrumbs: Optional[List[Dict]]):
def order_by_utc_time(breadcrumbs: Optional[List[Dict]], key: str = 'timestamp'):
""" Order breadcrumbs by timestamp in ascending order.

Args:
breadcrumbs: List of breadcrumbs
key: Field name that will be used for sorting

Returns:
Ordered list of breadcrumbs
"""
if not breadcrumbs:
return breadcrumbs

return list(sorted(breadcrumbs, key=lambda breadcrumb: breadcrumb["timestamp"]))
return list(sorted(breadcrumbs, key=lambda breadcrumb: breadcrumb[key]))
57 changes: 55 additions & 2 deletions src/tribler/core/sentry_reporter/tests/test_sentry_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import pytest

from tribler.core.sentry_reporter.sentry_reporter import (
BROWSER, CONTEXTS, NAME, OS_ENVIRON,
BREADCRUMBS, BROWSER, CONTEXTS, NAME, OS_ENVIRON,
REPORTER, STACKTRACE, SentryReporter,
SentryStrategy,
TAGS, TRIBLER, VERSION, this_sentry_strategy,
TAGS, TRIBLER, VALUES, VERSION, this_sentry_strategy,
)
from tribler.core.sentry_reporter.sentry_scrubber import SentryScrubber
from tribler.core.utilities.patch_import import patch_import
Expand Down Expand Up @@ -281,3 +281,56 @@ def test_sentry_strategy(sentry_reporter):

assert sentry_reporter.thread_strategy.get() is None
assert sentry_reporter.global_strategy == SentryStrategy.SEND_ALLOWED_WITH_CONFIRMATION


def test_format_breadcrumbs(sentry_reporter):
# Test that breadcrumbs are sorted by timestamp and duplicates are removed
event = {
BREADCRUMBS: {
VALUES: [
{'message': 'message 2', 'timestamp': 'timestamp 3'},
{'message': 'message 1', 'timestamp': 'timestamp 1'}, # wrong order
{'message': 'message 1', 'timestamp': 'timestamp 1'}, # duplicate
{'message': 'message 3', 'timestamp': 'timestamp 4'}, # wrong order
]
}
}
expected = {
BREADCRUMBS: {
VALUES: [
{'message': 'message 1', 'timestamp': 'timestamp 1'},
{'message': 'message 2', 'timestamp': 'timestamp 3'},
{'message': 'message 3', 'timestamp': 'timestamp 4'},
]
}
}

sentry_reporter._format_breadcrumbs(event)

assert event == expected


def test_format_breadcrumbs_wrong_fields(sentry_reporter):
# Test that in the case of wrong event format the breadcrumbs formatting will be skipped
event = {
BREADCRUMBS: {
VALUES: [
{'message': 'message 1', 'timestamp': 'timestamp 1'},
{'message': 'message 1', 'timestamp': 'timestamp 1'},
{'message': 'message 2'}, # an exception will be raised, therefore format will be skipped
]
}
}
expected = {
BREADCRUMBS: {
VALUES: [
{'message': 'message 1', 'timestamp': 'timestamp 1'},
{'message': 'message 1', 'timestamp': 'timestamp 1'},
{'message': 'message 2'},
]
}
}

sentry_reporter._format_breadcrumbs(event)

assert event == expected
40 changes: 32 additions & 8 deletions src/tribler/core/sentry_reporter/tests/test_sentry_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,41 @@ def test_safe_get():
assert get_value({'key': 'value'}, 'key1', {}) == {}


def test_distinct_none():
# Test distinct_by with None
assert distinct_by(None, lambda b: (b["timestamp"], b["message"])) is None


def test_distinct():
assert distinct_by(None, None) is None
assert distinct_by([], None) == []
assert distinct_by([{'key': 'b'}, {'key': 'b'}, {'key': 'c'}, {'': ''}], 'key') == [
{'key': 'b'},
{'key': 'c'},
{'': ''},
# Test distinct_by with default getter
values = [
{'message': 'message 1', 'timestamp': 'timestamp 1', 'id': '1'},
{'message': 'message 1', 'timestamp': 'timestamp 1', 'id': '2'},
{'message': 'message 2', 'timestamp': 'timestamp 2', 'id': '3'}
]

# test nested
assert distinct_by([{'a': {}}], 'b') == [{'a': {}}]
expected = [
{'message': 'message 1', 'timestamp': 'timestamp 1', 'id': '1'},
{'message': 'message 2', 'timestamp': 'timestamp 2', 'id': '3'}
]
assert distinct_by(values, lambda b: (b["timestamp"], b["message"])) == expected


def test_distinct_key_error():
# Test distinct_by with missing key in getter
values = [
{'message': 'message 1', },
]
with pytest.raises(KeyError):
distinct_by(values, lambda b: (b["timestamp"], b["message"]))



def test_distinct_none_in_list():
# Test distinct_by with None in list
values = [None]
with pytest.raises(TypeError):
distinct_by(values, lambda b: (b["timestamp"], b["message"]))


FORMATTED_VERSIONS = [
Expand Down
Loading