From 171b4aa36a1702d71c5cd22643742cb7269f8c70 Mon Sep 17 00:00:00 2001 From: Andrei Andreev Date: Mon, 27 May 2024 14:08:24 +0200 Subject: [PATCH] Remove duplicates in breadcrumbs Introduced a new method `_format_breadcrumbs` to handle the formatting of breadcrumbs in `SentryReporter`. This includes removing duplicates and ordering by timestamp. The logic for this has been extracted from the `_before_send` method. Also, updated the `distinct_by` function in `sentry_tools.py` to use a getter function for comparison instead of a key. This provides more flexibility when determining uniqueness. The `order_by_utc_time` function now accepts an optional key argument for sorting. Corresponding tests have been added and updated to reflect these changes. --- .../core/sentry_reporter/sentry_reporter.py | 25 +++++-- .../core/sentry_reporter/sentry_tools.py | 34 +++++----- .../tests/test_sentry_tools.py | 66 ++++++++++++++++--- 3 files changed, 93 insertions(+), 32 deletions(-) diff --git a/src/tribler/core/sentry_reporter/sentry_reporter.py b/src/tribler/core/sentry_reporter/sentry_reporter.py index da63176f63..071dd32423 100644 --- a/src/tribler/core/sentry_reporter/sentry_reporter.py +++ b/src/tribler/core/sentry_reporter/sentry_reporter.py @@ -16,7 +16,7 @@ from tribler.core import version from tribler.core.sentry_reporter.sentry_tools import ( - get_first_item, + distinct_by, get_first_item, get_value, order_by_utc_time ) @@ -340,6 +340,21 @@ def get_test_sentry_url() -> Optional[str]: def is_in_test_mode(): return bool(SentryReporter.get_test_sentry_url()) + def _format_breadcrumbs(self, event: Optional[Dict]): + """ Format breadcrumbs in the event: + - Remove duplicates + - Order by timestamp + """ + try: + if breadcrumbs := event.get(BREADCRUMBS): + breadcrumbs_values = breadcrumbs[VALUES] + breadcrumbs_values = distinct_by(breadcrumbs_values, key='timestamp') + breadcrumbs_values = order_by_utc_time(breadcrumbs_values, key='timestamp') + + event[BREADCRUMBS][VALUES] = breadcrumbs_values + except Exception as e: + self._logger.exception(e) + def _before_send(self, event: Optional[Dict], hint: Optional[Dict]) -> Optional[Dict]: """The method that is called before each send. Both allowed and disallowed. @@ -386,12 +401,8 @@ def _before_send(self, event: Optional[Dict], hint: Optional[Dict]) -> Optional[ if self.scrubber: event = self.scrubber.scrub_event(event) - # order breadcrumbs by timestamp in ascending order - if breadcrumbs := event.get(BREADCRUMBS): - try: - event[BREADCRUMBS][VALUES] = order_by_utc_time(breadcrumbs[VALUES]) - except Exception as e: - self._logger.exception(e) + self._format_breadcrumbs(event) + return event # pylint: disable=unused-argument diff --git a/src/tribler/core/sentry_reporter/sentry_tools.py b/src/tribler/core/sentry_reporter/sentry_tools.py index d3346f41b0..bb02055e52 100644 --- a/src/tribler/core/sentry_reporter/sentry_tools.py +++ b/src/tribler/core/sentry_reporter/sentry_tools.py @@ -3,8 +3,7 @@ """ import re from dataclasses import dataclass -from datetime import datetime -from typing import Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional, Tuple from faker import Faker @@ -57,37 +56,39 @@ def modify_value(d, key, function): return d -def distinct_by(list_of_dict, key): +def distinct_by(list_of_dict: Optional[List[Dict]], + getter: Callable[[Dict], Tuple[Any, Any]] = lambda b: (b["timestamp"], b["message"])): """This function removes all duplicates from a list of dictionaries. A duplicate here is a dictionary that have the same value of the given key. - If no key field is presented in the item, then the item will not be considered + If no key field is presented in the dictionary, then the dictionary will not be considered as a duplicate. Args: list_of_dict: list of dictionaries - key: a field key that will be used for items comparison + getter: function that returns a key for the comparison Returns: Array of distinct items """ - if not list_of_dict or not key: + if not list_of_dict: return list_of_dict - values_viewed = set() + seen = set() result = [] - for item in list_of_dict: - value = get_value(item, key, None) - if value is None: - result.append(item) + for d in list_of_dict: + try: + value = getter(d) + except (KeyError, TypeError): + result.append(d) continue - if value not in values_viewed: - result.append(item) + if value not in seen: + result.append(d) - values_viewed.add(value) + seen.add(value) return result @@ -124,11 +125,12 @@ def obfuscate_string(s: str, part_of_speech: str = 'noun') -> str: return faker.word(part_of_speech=part_of_speech) -def order_by_utc_time(breadcrumbs: Optional[List[Dict]]): +def order_by_utc_time(breadcrumbs: Optional[List[Dict]], key: str = 'timestamp'): """ Order breadcrumbs by timestamp in ascending order. Args: breadcrumbs: List of breadcrumbs + key: Field name that will be used for sorting Returns: Ordered list of breadcrumbs @@ -136,4 +138,4 @@ def order_by_utc_time(breadcrumbs: Optional[List[Dict]]): if not breadcrumbs: return breadcrumbs - return list(sorted(breadcrumbs, key=lambda breadcrumb: breadcrumb["timestamp"])) + return list(sorted(breadcrumbs, key=lambda breadcrumb: breadcrumb[key])) diff --git a/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py b/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py index 2a13ed363b..81fc40c37b 100644 --- a/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py +++ b/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py @@ -64,17 +64,65 @@ def test_safe_get(): assert get_value({'key': 'value'}, 'key1', {}) == {} -def test_distinct(): - assert distinct_by(None, None) is None - assert distinct_by([], None) == [] - assert distinct_by([{'key': 'b'}, {'key': 'b'}, {'key': 'c'}, {'': ''}], 'key') == [ - {'key': 'b'}, - {'key': 'c'}, - {'': ''}, +def test_distinct_none(): + # Test distinct_by with None + assert distinct_by(None) is None + + +def test_distinct_default(): + # Test distinct_by with default getter + values = [ + {'message': 'message 1', 'timestamp': 'timestamp 1'}, + {'message': 'message 1', 'timestamp': 'timestamp 1'}, + {'message': 'message 2', 'timestamp': 'timestamp 2'} + ] + + expected = [ + {'message': 'message 1', 'timestamp': 'timestamp 1'}, + {'message': 'message 2', 'timestamp': 'timestamp 2'} ] + assert distinct_by(values) == expected + + +def test_distinct_key_error(): + # Test distinct_by with missing key, it should keep the items + values = [ + {'message': 'message 1', }, + {'message': 'message 1', }, + {'timestamp': 'timestamp 1', }, + {'timestamp': 'timestamp 1', }, + ] + + expected = [ + {'message': 'message 1', }, + {'message': 'message 1', }, + {'timestamp': 'timestamp 1', }, + {'timestamp': 'timestamp 1', }, + ] + assert distinct_by(values) == expected + + +def test_distinct_not_default(): + # Test distinct_by with custom getter + values = [ + {'message': 'message 1', 'timestamp': 'timestamp 1'}, + {'message': 'message 2', 'timestamp': 'timestamp 2'}, + {'message': 'message 3', 'timestamp': 'timestamp 1'}, + {'message': 'message 4', 'timestamp': 'timestamp 2'}, + ] + + expected = [ + {'message': 'message 1', 'timestamp': 'timestamp 1'}, + {'message': 'message 2', 'timestamp': 'timestamp 2'}, + ] + assert distinct_by(values, lambda b: b['timestamp']) == expected + - # test nested - assert distinct_by([{'a': {}}], 'b') == [{'a': {}}] +def test_distinct_none_in_list(): + # Test distinct_by with None in list + values = [None] + expected = [None] + assert distinct_by(values) == expected FORMATTED_VERSIONS = [