From 47977889301d73d9abafd1d29321e02f6c53dd88 Mon Sep 17 00:00:00 2001 From: Andrei Andreev Date: Wed, 29 May 2024 12:49:40 +0200 Subject: [PATCH] Remove duplicates in breadcrumbs --- .../core/sentry_reporter/sentry_reporter.py | 25 +++++--- .../core/sentry_reporter/sentry_tools.py | 45 +++++++-------- .../tests/test_sentry_reporter.py | 57 ++++++++++++++++++- .../tests/test_sentry_tools.py | 40 ++++++++++--- 4 files changed, 124 insertions(+), 43 deletions(-) diff --git a/src/tribler/core/sentry_reporter/sentry_reporter.py b/src/tribler/core/sentry_reporter/sentry_reporter.py index da63176f63..e37bbfb86d 100644 --- a/src/tribler/core/sentry_reporter/sentry_reporter.py +++ b/src/tribler/core/sentry_reporter/sentry_reporter.py @@ -16,7 +16,7 @@ from tribler.core import version from tribler.core.sentry_reporter.sentry_tools import ( - get_first_item, + distinct_by, get_first_item, get_value, order_by_utc_time ) @@ -340,6 +340,21 @@ def get_test_sentry_url() -> Optional[str]: def is_in_test_mode(): return bool(SentryReporter.get_test_sentry_url()) + def _format_breadcrumbs(self, event: Optional[Dict]): + """ Format breadcrumbs in the event: + - Remove duplicates + - Order by timestamp + """ + try: + if breadcrumbs := event.get(BREADCRUMBS): + breadcrumbs_values = breadcrumbs[VALUES] + breadcrumbs_values = distinct_by(breadcrumbs_values, getter=lambda b: (b["timestamp"], b["message"])) + breadcrumbs_values = order_by_utc_time(breadcrumbs_values) + + event[BREADCRUMBS][VALUES] = breadcrumbs_values + except Exception as e: + self._logger.exception(e) + def _before_send(self, event: Optional[Dict], hint: Optional[Dict]) -> Optional[Dict]: """The method that is called before each send. Both allowed and disallowed. @@ -386,12 +401,8 @@ def _before_send(self, event: Optional[Dict], hint: Optional[Dict]) -> Optional[ if self.scrubber: event = self.scrubber.scrub_event(event) - # order breadcrumbs by timestamp in ascending order - if breadcrumbs := event.get(BREADCRUMBS): - try: - event[BREADCRUMBS][VALUES] = order_by_utc_time(breadcrumbs[VALUES]) - except Exception as e: - self._logger.exception(e) + self._format_breadcrumbs(event) + return event # pylint: disable=unused-argument diff --git a/src/tribler/core/sentry_reporter/sentry_tools.py b/src/tribler/core/sentry_reporter/sentry_tools.py index d3346f41b0..f89e990c5d 100644 --- a/src/tribler/core/sentry_reporter/sentry_tools.py +++ b/src/tribler/core/sentry_reporter/sentry_tools.py @@ -3,8 +3,7 @@ """ import re from dataclasses import dataclass -from datetime import datetime -from typing import Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional, TypeVar from faker import Faker @@ -57,39 +56,32 @@ def modify_value(d, key, function): return d -def distinct_by(list_of_dict, key): +T = TypeVar('T') + + +def distinct_by(items: Optional[List[T]], getter: Callable[[T], Any]) -> Optional[List[T]]: """This function removes all duplicates from a list of dictionaries. A duplicate here is a dictionary that have the same value of the given key. - If no key field is presented in the item, then the item will not be considered - as a duplicate. + If no key field is presented in the dictionary, then the exception will be raised. Args: - list_of_dict: list of dictionaries - key: a field key that will be used for items comparison + items: list of dictionaries + getter: function that returns a key for the comparison Returns: Array of distinct items """ - if not list_of_dict or not key: - return list_of_dict - - values_viewed = set() - result = [] - - for item in list_of_dict: - value = get_value(item, key, None) - if value is None: - result.append(item) - continue - - if value not in values_viewed: - result.append(item) - - values_viewed.add(value) + if not items: + return items - return result + distinct = {} + for item in items: + key = getter(item) + if key not in distinct: + distinct[key] = item + return list(distinct.values()) def format_version(version: Optional[str]) -> Optional[str]: @@ -124,11 +116,12 @@ def obfuscate_string(s: str, part_of_speech: str = 'noun') -> str: return faker.word(part_of_speech=part_of_speech) -def order_by_utc_time(breadcrumbs: Optional[List[Dict]]): +def order_by_utc_time(breadcrumbs: Optional[List[Dict]], key: str = 'timestamp'): """ Order breadcrumbs by timestamp in ascending order. Args: breadcrumbs: List of breadcrumbs + key: Field name that will be used for sorting Returns: Ordered list of breadcrumbs @@ -136,4 +129,4 @@ def order_by_utc_time(breadcrumbs: Optional[List[Dict]]): if not breadcrumbs: return breadcrumbs - return list(sorted(breadcrumbs, key=lambda breadcrumb: breadcrumb["timestamp"])) + return list(sorted(breadcrumbs, key=lambda breadcrumb: breadcrumb[key])) diff --git a/src/tribler/core/sentry_reporter/tests/test_sentry_reporter.py b/src/tribler/core/sentry_reporter/tests/test_sentry_reporter.py index 322320df18..79a7589f6b 100644 --- a/src/tribler/core/sentry_reporter/tests/test_sentry_reporter.py +++ b/src/tribler/core/sentry_reporter/tests/test_sentry_reporter.py @@ -4,10 +4,10 @@ import pytest from tribler.core.sentry_reporter.sentry_reporter import ( - BROWSER, CONTEXTS, NAME, OS_ENVIRON, + BREADCRUMBS, BROWSER, CONTEXTS, NAME, OS_ENVIRON, REPORTER, STACKTRACE, SentryReporter, SentryStrategy, - TAGS, TRIBLER, VERSION, this_sentry_strategy, + TAGS, TRIBLER, VALUES, VERSION, this_sentry_strategy, ) from tribler.core.sentry_reporter.sentry_scrubber import SentryScrubber from tribler.core.utilities.patch_import import patch_import @@ -281,3 +281,56 @@ def test_sentry_strategy(sentry_reporter): assert sentry_reporter.thread_strategy.get() is None assert sentry_reporter.global_strategy == SentryStrategy.SEND_ALLOWED_WITH_CONFIRMATION + + +def test_format_breadcrumbs(sentry_reporter): + # Test that breadcrumbs are sorted by timestamp and duplicates are removed + event = { + BREADCRUMBS: { + VALUES: [ + {'message': 'message 2', 'timestamp': 'timestamp 3'}, + {'message': 'message 1', 'timestamp': 'timestamp 1'}, # wrong order + {'message': 'message 1', 'timestamp': 'timestamp 1'}, # duplicate + {'message': 'message 3', 'timestamp': 'timestamp 4'}, # wrong order + ] + } + } + expected = { + BREADCRUMBS: { + VALUES: [ + {'message': 'message 1', 'timestamp': 'timestamp 1'}, + {'message': 'message 2', 'timestamp': 'timestamp 3'}, + {'message': 'message 3', 'timestamp': 'timestamp 4'}, + ] + } + } + + sentry_reporter._format_breadcrumbs(event) + + assert event == expected + + +def test_format_breadcrumbs_wrong_fields(sentry_reporter): + # Test that in the case of wrong event format the breadcrumbs formatting will be skipped + event = { + BREADCRUMBS: { + VALUES: [ + {'message': 'message 1', 'timestamp': 'timestamp 1'}, + {'message': 'message 1', 'timestamp': 'timestamp 1'}, + {'message': 'message 2'}, # an exception will be raised, therefore format will be skipped + ] + } + } + expected = { + BREADCRUMBS: { + VALUES: [ + {'message': 'message 1', 'timestamp': 'timestamp 1'}, + {'message': 'message 1', 'timestamp': 'timestamp 1'}, + {'message': 'message 2'}, + ] + } + } + + sentry_reporter._format_breadcrumbs(event) + + assert event == expected diff --git a/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py b/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py index 2a13ed363b..fbf0ff8c64 100644 --- a/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py +++ b/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py @@ -64,17 +64,41 @@ def test_safe_get(): assert get_value({'key': 'value'}, 'key1', {}) == {} +def test_distinct_none(): + # Test distinct_by with None + assert distinct_by(None, lambda b: (b["timestamp"], b["message"])) is None + + def test_distinct(): - assert distinct_by(None, None) is None - assert distinct_by([], None) == [] - assert distinct_by([{'key': 'b'}, {'key': 'b'}, {'key': 'c'}, {'': ''}], 'key') == [ - {'key': 'b'}, - {'key': 'c'}, - {'': ''}, + # Test distinct_by with default getter + values = [ + {'message': 'message 1', 'timestamp': 'timestamp 1', 'id': '1'}, + {'message': 'message 1', 'timestamp': 'timestamp 1', 'id': '2'}, + {'message': 'message 2', 'timestamp': 'timestamp 2', 'id': '3'} ] - # test nested - assert distinct_by([{'a': {}}], 'b') == [{'a': {}}] + expected = [ + {'message': 'message 1', 'timestamp': 'timestamp 1', 'id': '1'}, + {'message': 'message 2', 'timestamp': 'timestamp 2', 'id': '3'} + ] + assert distinct_by(values, lambda b: (b["timestamp"], b["message"])) == expected + + +def test_distinct_key_error(): + # Test distinct_by with missing key in getter + values = [ + {'message': 'message 1', }, + ] + with pytest.raises(KeyError): + distinct_by(values, lambda b: (b["timestamp"], b["message"])) + + + +def test_distinct_none_in_list(): + # Test distinct_by with None in list + values = [None] + with pytest.raises(TypeError): + distinct_by(values, lambda b: (b["timestamp"], b["message"])) FORMATTED_VERSIONS = [