From a87f13fac039a6dfc0bb5ba7a65073a5ce226fed Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Wed, 10 Aug 2022 07:37:16 +0200 Subject: [PATCH 1/9] Show error class name instead of the error code in the log message --- src/tribler/gui/event_request_manager.py | 8 +++++--- src/tribler/gui/utilities.py | 13 ++++++++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/tribler/gui/event_request_manager.py b/src/tribler/gui/event_request_manager.py index 78bf4e347d3..855d16c3107 100644 --- a/src/tribler/gui/event_request_manager.py +++ b/src/tribler/gui/event_request_manager.py @@ -10,7 +10,7 @@ from tribler.core.utilities.notifier import Notifier from tribler.gui import gui_sentry_reporter from tribler.gui.exceptions import CoreConnectTimeoutError, CoreConnectionError -from tribler.gui.utilities import connect +from tribler.gui.utilities import connect, make_network_errors_dict received_events = [] @@ -46,6 +46,7 @@ def __init__(self, api_port, api_key, error_handler): self.shutting_down = False self.error_handler = error_handler self._logger = logging.getLogger(self.__class__.__name__) + self.network_errors = make_network_errors_dict() self.connect_timer.setSingleShot(True) connect(self.connect_timer.timeout, self.connect) @@ -95,11 +96,12 @@ def on_tribler_shutdown_state(self,state: str): def on_report_config_error(self, error): self.config_error_signal.emit(error) - def on_error(self, error, reschedule_on_err): + def on_error(self, error: int, reschedule_on_err: bool): if error == QNetworkReply.ConnectionRefusedError: self._logger.debug("Tribler Core refused connection, retrying...") else: - raise CoreConnectionError(f"Error {error} while trying to connect to Tribler Core") + error_name = self.network_errors.get(error, error) + raise CoreConnectionError(f"Error {error_name} while trying to connect to Tribler Core") if self.remaining_connection_attempts <= 0: raise CoreConnectTimeoutError("Could not connect with the Tribler Core within " diff --git a/src/tribler/gui/utilities.py b/src/tribler/gui/utilities.py index 8442ee137f0..1565937d917 100644 --- a/src/tribler/gui/utilities.py +++ b/src/tribler/gui/utilities.py @@ -8,7 +8,7 @@ import types from datetime import datetime, timedelta from pathlib import Path -from typing import Callable +from typing import Callable, Dict from urllib.parse import quote_plus from uuid import uuid4 @@ -21,6 +21,7 @@ pyqtSignal, ) from PyQt5.QtGui import QPixmap, QRegion +from PyQt5.QtNetwork import QNetworkReply from PyQt5.QtWidgets import QApplication, QMessageBox import tribler.gui @@ -490,3 +491,13 @@ def show_message_box(text: str = '', title: str = 'Error', icon: QMessageBox.Ico message_box.setWindowTitle(title) message_box.setText(text) message_box.exec_() + + +def make_network_errors_dict() -> Dict[int, str]: + network_errors = {} + for name in dir(QNetworkReply): + if name.endswith('Error'): + value = getattr(QNetworkReply, name) + if isinstance(value, int): + network_errors[value] = name + return network_errors From 1ba7a8c992a1bbc21f845f2eb4b5cd5238f27714 Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Wed, 10 Aug 2022 07:41:53 +0200 Subject: [PATCH 2/9] Improved log messages --- src/tribler/gui/core_manager.py | 8 ++++++-- src/tribler/gui/event_request_manager.py | 13 ++++++++++--- src/tribler/gui/tribler_window.py | 4 +++- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/tribler/gui/core_manager.py b/src/tribler/gui/core_manager.py index e21ea2131a3..c74cd348866 100644 --- a/src/tribler/gui/core_manager.py +++ b/src/tribler/gui/core_manager.py @@ -51,8 +51,11 @@ def __init__(self, root_state_dir: Path, api_port: int, api_key: str, app_manage connect(self.events_manager.core_connected, self.on_core_connected) def on_core_connected(self, _): - if not self.core_finished: - self.core_connected = True + if self.core_finished: + self._logger.warning('Core connected after the core process is already finished') + return + + self.core_connected = True def start(self, core_args=None, core_env=None, upgrade_manager=None, run_core=True): """ @@ -170,6 +173,7 @@ def send_shutdown_request(initial=False): self.app_manager.quit_application() def on_core_finished(self, exit_code, exit_status): + self._logger.info("Core process finished") self.core_running = False self.core_finished = True if self.shutting_down: diff --git a/src/tribler/gui/event_request_manager.py b/src/tribler/gui/event_request_manager.py index 855d16c3107..1c28a07c9e3 100644 --- a/src/tribler/gui/event_request_manager.py +++ b/src/tribler/gui/event_request_manager.py @@ -43,6 +43,7 @@ def __init__(self, api_port, api_key, error_handler): self.connect_timer = QTimer() self.current_event_string = "" self.reply = None + self.receiving_data = False self.shutting_down = False self.error_handler = error_handler self._logger = logging.getLogger(self.__class__.__name__) @@ -97,13 +98,15 @@ def on_report_config_error(self, error): self.config_error_signal.emit(error) def on_error(self, error: int, reschedule_on_err: bool): + should_retry = self.remaining_connection_attempts > 0 + if error == QNetworkReply.ConnectionRefusedError: - self._logger.debug("Tribler Core refused connection, retrying...") + self._logger.info("Tribler Core refused connection" + (', will retry...' if should_retry else '')) else: error_name = self.network_errors.get(error, error) raise CoreConnectionError(f"Error {error_name} while trying to connect to Tribler Core") - if self.remaining_connection_attempts <= 0: + if should_retry: raise CoreConnectTimeoutError("Could not connect with the Tribler Core within " f"{RECONNECT_INTERVAL_MS*CORE_CONNECTION_ATTEMPTS_LIMIT//1000} seconds") @@ -114,6 +117,10 @@ def on_error(self, error: int, reschedule_on_err: bool): self.connect_timer.start(RECONNECT_INTERVAL_MS) def on_read_data(self): + if not self.receiving_data: + self.receiving_data = True + self._logger.info('Starts receiving data from Core') + if self.receivers(self.finished) == 0: connect(self.finished, lambda reply: self.on_finished()) self.connect_timer.stop() @@ -148,7 +155,7 @@ def on_finished(self): self.connect_timer.start(RECONNECT_INTERVAL_MS) def connect(self, reschedule_on_err=True): - self._logger.debug("Will connect to events endpoint") + self._logger.info(f"Connecting to events endpoint ({'with' if reschedule_on_err else 'without'} retrying)") if self.reply is not None: self.reply.deleteLater() self.reply = self.get(self.request) diff --git a/src/tribler/gui/tribler_window.py b/src/tribler/gui/tribler_window.py index 9a20c443843..363673e5d71 100644 --- a/src/tribler/gui/tribler_window.py +++ b/src/tribler/gui/tribler_window.py @@ -527,9 +527,11 @@ def tray_show_message(self, title, message): def on_core_connected(self, version): if self.tribler_started: - logging.warning("Received duplicate Tribler Core started event") + self._logger.warning("Received duplicate Tribler Core connected event") return + self._logger.info("Core connected") + self.tribler_started = True self.tribler_version = version From d6d7dc9e807c2ae026157d6b74289127747c7eed Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Wed, 10 Aug 2022 08:14:27 +0200 Subject: [PATCH 3/9] Retry on any error, not only on ConnectionRefusedError --- src/tribler/gui/event_request_manager.py | 35 +++++++++++++----------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/tribler/gui/event_request_manager.py b/src/tribler/gui/event_request_manager.py index 1c28a07c9e3..38d76d2c473 100644 --- a/src/tribler/gui/event_request_manager.py +++ b/src/tribler/gui/event_request_manager.py @@ -9,7 +9,7 @@ from tribler.core.components.reporter.reported_error import ReportedError from tribler.core.utilities.notifier import Notifier from tribler.gui import gui_sentry_reporter -from tribler.gui.exceptions import CoreConnectTimeoutError, CoreConnectionError +from tribler.gui.exceptions import CoreConnectTimeoutError from tribler.gui.utilities import connect, make_network_errors_dict received_events = [] @@ -98,23 +98,26 @@ def on_report_config_error(self, error): self.config_error_signal.emit(error) def on_error(self, error: int, reschedule_on_err: bool): - should_retry = self.remaining_connection_attempts > 0 - - if error == QNetworkReply.ConnectionRefusedError: - self._logger.info("Tribler Core refused connection" + (', will retry...' if should_retry else '')) - else: - error_name = self.network_errors.get(error, error) - raise CoreConnectionError(f"Error {error_name} while trying to connect to Tribler Core") - - if should_retry: - raise CoreConnectTimeoutError("Could not connect with the Tribler Core within " - f"{RECONNECT_INTERVAL_MS*CORE_CONNECTION_ATTEMPTS_LIMIT//1000} seconds") - - self.remaining_connection_attempts -= 1 + # If the REST API server is not started yet and the port is not opened, the error will be received. + # The specific error can be different on different systems: + # - QNetworkReply.ConnectionRefusedError (code 1); + # - QNetworkReply.HostNotFoundError (code 3); + # - QNetworkReply.TimeoutError (code 4); + # - QNetworkReply.UnknownNetworkError (code 99). + # Tribler GUI should retry on any of these errors. + + should_retry = reschedule_on_err and self.remaining_connection_attempts > 0 + error_name = self.network_errors.get(error, error) + self._logger.info(f"Error {error_name} while trying to connect to Tribler Core" + + (', will retry...' if should_retry else '')) if reschedule_on_err: - # Reschedule an attempt - self.connect_timer.start(RECONNECT_INTERVAL_MS) + if should_retry: + self.remaining_connection_attempts -= 1 + self.connect_timer.start(RECONNECT_INTERVAL_MS) # Reschedule an attempt + else: + raise CoreConnectTimeoutError("Could not connect with the Tribler Core within " + f"{RECONNECT_INTERVAL_MS*CORE_CONNECTION_ATTEMPTS_LIMIT//1000} seconds") def on_read_data(self): if not self.receiving_data: From 558638baf4cc6b5a048adfd364664ad8dfda0db6 Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Wed, 10 Aug 2022 08:23:12 +0200 Subject: [PATCH 4/9] Fix the retry timeout logic, the previous logic did not take into account that getting the error response is not immediate and can take up to 5 seconds --- src/tribler/gui/event_request_manager.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/tribler/gui/event_request_manager.py b/src/tribler/gui/event_request_manager.py index 38d76d2c473..207d76caef0 100644 --- a/src/tribler/gui/event_request_manager.py +++ b/src/tribler/gui/event_request_manager.py @@ -14,8 +14,8 @@ received_events = [] -CORE_CONNECTION_ATTEMPTS_LIMIT = 120 -RECONNECT_INTERVAL_MS = 500 +CORE_CONNECTION_TIMEOUT = 60 +RECONNECT_INTERVAL_MS = 100 class EventRequestManager(QNetworkAccessManager): @@ -39,7 +39,7 @@ def __init__(self, api_port, api_key, error_handler): url = QUrl("http://localhost:%d/events" % api_port) self.request = QNetworkRequest(url) self.request.setRawHeader(b'X-Api-Key', api_key.encode('ascii')) - self.remaining_connection_attempts = CORE_CONNECTION_ATTEMPTS_LIMIT + self.start_time = time.time() self.connect_timer = QTimer() self.current_event_string = "" self.reply = None @@ -106,18 +106,23 @@ def on_error(self, error: int, reschedule_on_err: bool): # - QNetworkReply.UnknownNetworkError (code 99). # Tribler GUI should retry on any of these errors. - should_retry = reschedule_on_err and self.remaining_connection_attempts > 0 + # Depending on the system, while the server is not started, the error can be returned with some delay + # (like, five seconds). But don't try to specify a timeout using request.setTransferTimeout(REQUEST_TIMEOUT_MS). + # First, it is unnecessary, as the reply is sent almost immediately after the REST API is started, + # so the GUI will not wait five seconds for that. Also, with TransferTimeout specified, AIOHTTP starts + # raising ConnectionResetError "Cannot write to closing transport". + + should_retry = reschedule_on_err and time.time() < self.start_time + CORE_CONNECTION_TIMEOUT error_name = self.network_errors.get(error, error) self._logger.info(f"Error {error_name} while trying to connect to Tribler Core" + (', will retry...' if should_retry else '')) if reschedule_on_err: if should_retry: - self.remaining_connection_attempts -= 1 self.connect_timer.start(RECONNECT_INTERVAL_MS) # Reschedule an attempt else: - raise CoreConnectTimeoutError("Could not connect with the Tribler Core within " - f"{RECONNECT_INTERVAL_MS*CORE_CONNECTION_ATTEMPTS_LIMIT//1000} seconds") + raise CoreConnectTimeoutError( + f"Could not connect with the Tribler Core within {CORE_CONNECTION_TIMEOUT} seconds") def on_read_data(self): if not self.receiving_data: @@ -154,7 +159,7 @@ def on_finished(self): if self.shutting_down: return self._logger.warning("Events connection dropped, attempting to reconnect") - self.remaining_connection_attempts = CORE_CONNECTION_ATTEMPTS_LIMIT + self.start_time = time.time() self.connect_timer.start(RECONNECT_INTERVAL_MS) def connect(self, reschedule_on_err=True): From a7bf7c974700428cc59980787a7a0220ab45953a Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Wed, 10 Aug 2022 08:35:05 +0200 Subject: [PATCH 5/9] Add typing --- src/tribler/gui/event_request_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tribler/gui/event_request_manager.py b/src/tribler/gui/event_request_manager.py index 207d76caef0..87e8255c77a 100644 --- a/src/tribler/gui/event_request_manager.py +++ b/src/tribler/gui/event_request_manager.py @@ -1,6 +1,7 @@ import json import logging import time +from typing import Optional from PyQt5.QtCore import QTimer, QUrl, pyqtSignal from PyQt5.QtNetwork import QNetworkAccessManager, QNetworkReply, QNetworkRequest @@ -42,7 +43,7 @@ def __init__(self, api_port, api_key, error_handler): self.start_time = time.time() self.connect_timer = QTimer() self.current_event_string = "" - self.reply = None + self.reply: Optional[QNetworkReply] = None self.receiving_data = False self.shutting_down = False self.error_handler = error_handler From 9a05cc62cc903a735e44ef4146500084f15d228f Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Wed, 17 Aug 2022 01:21:23 +0200 Subject: [PATCH 6/9] Extract CoreManager.kill_core_process() --- src/tribler/gui/core_manager.py | 10 ++++++++++ src/tribler/gui/tribler_window.py | 7 +------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/tribler/gui/core_manager.py b/src/tribler/gui/core_manager.py index c74cd348866..ef3c8843bf3 100644 --- a/src/tribler/gui/core_manager.py +++ b/src/tribler/gui/core_manager.py @@ -172,6 +172,16 @@ def send_shutdown_request(initial=False): self._logger.info('Core finished, quitting GUI application') self.app_manager.quit_application() + def kill_core_process(self): + if not self.core_process: + self._logger.warning("Cannot kill the Core process as it is not initialized") + + pid = self.core_process.pid() + try: + os.kill(pid, 9) + except OSError: + pass + def on_core_finished(self, exit_code, exit_status): self._logger.info("Core process finished") self.core_running = False diff --git a/src/tribler/gui/tribler_window.py b/src/tribler/gui/tribler_window.py index 363673e5d71..1b75fa517c8 100644 --- a/src/tribler/gui/tribler_window.py +++ b/src/tribler/gui/tribler_window.py @@ -1207,12 +1207,7 @@ def dropEvent(self, e): e.accept() def clicked_force_shutdown(self): - pid = self.core_manager.core_process.pid() - try: - os.kill(pid, 9) - except OSError: - pass - + self.core_manager.kill_core_process() self.app_manager.quit_application() def clicked_skip_conversion(self): From 80f4068d0b6667a83c5c18250a0077714e80c827 Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Wed, 17 Aug 2022 01:21:23 +0200 Subject: [PATCH 7/9] kill_core_process() -> kill_core_process_and_remove_the_lock_file() --- src/tribler/gui/core_manager.py | 6 +++++- src/tribler/gui/tribler_window.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/tribler/gui/core_manager.py b/src/tribler/gui/core_manager.py index ef3c8843bf3..ed946248249 100644 --- a/src/tribler/gui/core_manager.py +++ b/src/tribler/gui/core_manager.py @@ -7,6 +7,7 @@ from PyQt5.QtCore import QObject, QProcess, QProcessEnvironment, pyqtSignal from PyQt5.QtNetwork import QNetworkRequest +from tribler.core.utilities.process_checker import ProcessChecker from tribler.gui.app_manager import AppManager from tribler.gui.event_request_manager import EventRequestManager from tribler.gui.exceptions import CoreCrashedError @@ -172,7 +173,7 @@ def send_shutdown_request(initial=False): self._logger.info('Core finished, quitting GUI application') self.app_manager.quit_application() - def kill_core_process(self): + def kill_core_process_and_remove_the_lock_file(self): if not self.core_process: self._logger.warning("Cannot kill the Core process as it is not initialized") @@ -182,6 +183,9 @@ def kill_core_process(self): except OSError: pass + process_checker = ProcessChecker(self.root_state_dir) + process_checker.remove_lock() + def on_core_finished(self, exit_code, exit_status): self._logger.info("Core process finished") self.core_running = False diff --git a/src/tribler/gui/tribler_window.py b/src/tribler/gui/tribler_window.py index 1b75fa517c8..ff414aa0fc5 100644 --- a/src/tribler/gui/tribler_window.py +++ b/src/tribler/gui/tribler_window.py @@ -1207,7 +1207,7 @@ def dropEvent(self, e): e.accept() def clicked_force_shutdown(self): - self.core_manager.kill_core_process() + self.core_manager.kill_core_process_and_remove_the_lock_file() self.app_manager.quit_application() def clicked_skip_conversion(self): From 4b156d2bc7f4b856da1efa2ba6eb5f50867ca679 Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Wed, 10 Aug 2022 09:23:48 +0200 Subject: [PATCH 8/9] Use QProcess.kill() instead of os.kill(pid, 9) --- src/tribler/gui/core_manager.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/tribler/gui/core_manager.py b/src/tribler/gui/core_manager.py index ed946248249..9e0063b1420 100644 --- a/src/tribler/gui/core_manager.py +++ b/src/tribler/gui/core_manager.py @@ -177,11 +177,10 @@ def kill_core_process_and_remove_the_lock_file(self): if not self.core_process: self._logger.warning("Cannot kill the Core process as it is not initialized") - pid = self.core_process.pid() - try: - os.kill(pid, 9) - except OSError: - pass + self.core_process.kill() + finished = self.core_process.waitForFinished() + if not finished: + self._logger.error('Cannot kill the core process') process_checker = ProcessChecker(self.root_state_dir) process_checker.remove_lock() From 799c5bbeef33ba98f15cab906d34f2b129ea41f6 Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Wed, 10 Aug 2022 09:25:45 +0200 Subject: [PATCH 9/9] Proper shutdown in case Tribler GUI cannot connect to Core --- src/tribler/gui/core_manager.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/tribler/gui/core_manager.py b/src/tribler/gui/core_manager.py index 9e0063b1420..2d861e0764f 100644 --- a/src/tribler/gui/core_manager.py +++ b/src/tribler/gui/core_manager.py @@ -56,6 +56,10 @@ def on_core_connected(self, _): self._logger.warning('Core connected after the core process is already finished') return + if self.shutting_down: + self._logger.warning('Core connected after the shutting down is already started') + return + self.core_connected = True def start(self, core_args=None, core_env=None, upgrade_manager=None, run_core=True): @@ -151,8 +155,13 @@ def stop(self, quit_app_on_core_finished=True): self.shutting_down = True self._logger.info("Stopping Core manager") - need_to_shutdown_core = (self.core_process or self.core_connected) and not self.core_finished - if need_to_shutdown_core: + if self.core_process and not self.core_finished: + if not self.core_connected: + # If Core is not connected via events_manager it also most probably cannot process API requests. + self._logger.warning('Core is not connected during the CoreManager shutdown, killing it...') + self.kill_core_process_and_remove_the_lock_file() + return + self.events_manager.shutting_down = True def shutdown_request_processed(response): @@ -170,7 +179,7 @@ def send_shutdown_request(initial=False): send_shutdown_request(initial=True) elif self.should_quit_app_on_core_finished: - self._logger.info('Core finished, quitting GUI application') + self._logger.info('Core is not running, quitting GUI application') self.app_manager.quit_application() def kill_core_process_and_remove_the_lock_file(self):