From da96b041a6b12570ac08fddc815eff0680d48d0a Mon Sep 17 00:00:00 2001 From: 4r0n05 <4r0n05@users.noreply.github.com> Date: Sat, 11 Jun 2016 23:03:38 +0200 Subject: [PATCH 1/2] Implement timeout retry --- config.yaml | 3 +++ telegram-history-dump.rb | 25 ++++++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/config.yaml b/config.yaml index 80341ac..086d267 100644 --- a/config.yaml +++ b/config.yaml @@ -100,6 +100,9 @@ # Time in seconds to wait before considering a request stuck (0 = infinite) chunk_timeout: 10 + + # Number of times to retry getting one chunk + chunk_retry: 3 # Time in seconds to wait before considering a download stuck (0 = infinite) # Beware: setting this nonzero may cause a dialog backup to be canceled diff --git a/telegram-history-dump.rb b/telegram-history-dump.rb index 9e5511e..8eb2cf1 100755 --- a/telegram-history-dump.rb +++ b/telegram-history-dump.rb @@ -13,6 +13,9 @@ require_relative 'lib/util' require_relative 'lib/tg_def' +class RetryError < Exception +end + Dir[File.dirname(__FILE__) + '/formatters/*.rb'].each do |file| require File.expand_path(file) end @@ -60,9 +63,21 @@ def dump_dialog(dialog) offset + $config['chunk_size'] ]) msg_chunk = nil - Timeout::timeout($config['chunk_timeout']) do - msg_chunk = exec_tg_command('history', dialog['print_name'], - $config['chunk_size'], offset) + retry_count = 0 + while retry_count <= $config["chunk_retry"] do + begin + Timeout::timeout($config['chunk_timeout']) do + msg_chunk = exec_tg_command('history', dialog['print_name'], + $config['chunk_size'], offset) + end + break + rescue Timeout::Error + if retry_count == $config["chunk_retry"] + raise RetryError + end + $log.error('Timeout, retrying... (%d/%d)' % [retry_count + 1, $config["chunk_retry"]]) + retry_count += 1; + end end raise 'Expected array' unless msg_chunk.is_a?(Array) msg_chunk.reverse_each do |msg| @@ -299,8 +314,8 @@ def save_progress connect_socket dump_dialog(dialog) save_progress - rescue Timeout::Error - $log.error('Command timeout, skipping to next dialog') + rescue RetryError + $log.error('Timeout, skipping to next dialog') disconnect_socket end end From dddbb5505c7068ab08dff5845fd5c9ba67866a56 Mon Sep 17 00:00:00 2001 From: Tim van der Staaij Date: Thu, 30 Jun 2016 17:03:35 +0200 Subject: [PATCH 2/2] Refactor PR #23, skip only one chunk when a chunk request times out (#24) --- config.yaml | 8 +++---- telegram-history-dump.rb | 50 +++++++++++++++++++++++----------------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/config.yaml b/config.yaml index 086d267..7b98bef 100644 --- a/config.yaml +++ b/config.yaml @@ -100,13 +100,13 @@ # Time in seconds to wait before considering a request stuck (0 = infinite) chunk_timeout: 10 - + # Number of times to retry getting one chunk - chunk_retry: 3 + chunk_retry: 3 # Time in seconds to wait before considering a download stuck (0 = infinite) - # Beware: setting this nonzero may cause a dialog backup to be canceled - # because of a large media file + # This setting is not very useful at the moment, it just skips to the next + # dialog if downloading a media file takes too long media_timeout: 0 diff --git a/telegram-history-dump.rb b/telegram-history-dump.rb index 8eb2cf1..cfb2fc2 100755 --- a/telegram-history-dump.rb +++ b/telegram-history-dump.rb @@ -13,9 +13,6 @@ require_relative 'lib/util' require_relative 'lib/tg_def' -class RetryError < Exception -end - Dir[File.dirname(__FILE__) + '/formatters/*.rb'].each do |file| require File.expand_path(file) end @@ -57,29 +54,39 @@ def dump_dialog(dialog) offset = 0 keep_dumping = true while keep_dumping do + cur_offset = offset $log.info('Dumping "%s" (range %d-%d)' % [ dialog['print_name'], - offset + 1, - offset + $config['chunk_size'] + cur_offset + 1, + cur_offset + $config['chunk_size'] ]) msg_chunk = nil retry_count = 0 - while retry_count <= $config["chunk_retry"] do - begin - Timeout::timeout($config['chunk_timeout']) do - msg_chunk = exec_tg_command('history', dialog['print_name'], - $config['chunk_size'], offset) - end - break - rescue Timeout::Error - if retry_count == $config["chunk_retry"] - raise RetryError - end - $log.error('Timeout, retrying... (%d/%d)' % [retry_count + 1, $config["chunk_retry"]]) - retry_count += 1; + while retry_count <= $config['chunk_retry'] do + begin + Timeout::timeout($config['chunk_timeout']) do + msg_chunk = exec_tg_command('history', dialog['print_name'], + $config['chunk_size'], cur_offset) + end + break + rescue Timeout::Error + if retry_count == $config['chunk_retry'] + $log.error('Failed to fetch chunk of %d messages from offset %d '\ + 'after retrying %d times. Dump of "%s" is incomplete.' % [ + $config['chunk_size'], cur_offset, + retry_count, dialog['print_name'] + ]) + msg_chunk = [] + offset += $config['chunk_size'] + break end + $log.error('Timeout, retrying... (%d/%d)' % [ + retry_count += 1, $config['chunk_retry'] + ]) + end end raise 'Expected array' unless msg_chunk.is_a?(Array) + msg_chunk.reverse_each do |msg| dump_msg = true unless msg['id'] @@ -118,7 +125,8 @@ def dump_dialog(dialog) break end end - keep_dumping = false if msg_chunk.length < $config['chunk_size'] + + keep_dumping = false if offset < cur_offset + $config['chunk_size'] sleep($config['chunk_delay']) if keep_dumping end state = $dumper.end_dialog(dialog) || {} @@ -314,8 +322,8 @@ def save_progress connect_socket dump_dialog(dialog) save_progress - rescue RetryError - $log.error('Timeout, skipping to next dialog') + rescue Timeout::Error + $log.error('Unhandled timeout, skipping to next dialog') disconnect_socket end end