Skip to content

Commit

Permalink
Handle upload exceptions allowing --retries to work properly (#3558)
Browse files Browse the repository at this point in the history
Signed-off-by: Mark Huth <[email protected]>
(cherry picked from commit d0dc83c)
  • Loading branch information
mhuth authored and xiangce committed Oct 25, 2022
1 parent 1065fff commit ce1cb34
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 36 deletions.
3 changes: 1 addition & 2 deletions insights/client/apps/malware_detection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@
network_filesystem_types: [nfs, nfs4, cifs, smbfs, fuse.sshfs, ceph, glusterfs, gfs, gfs2]
# Scan the running processes?
# Disabled by default due to an existing issue in Yara that can impact system performance
# when scanning numerous or large processes. Option will be enabled by default when the Yara fix is available.
# Scan_process is disabled by default to prevent an impact on system performance when scanning numerous or large processes.
# When it is false, no processes are scanned and the processes_scan_* options that follow are ignored
scan_processes: false
Expand Down
52 changes: 30 additions & 22 deletions insights/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,12 @@ def _legacy_upload(config, pconn, tar_file, content_type, collection_duration=No
logger.info('Uploading Insights data.')
api_response = None
for tries in range(config.retries):
upload = pconn.upload_archive(tar_file, '', collection_duration)
logger.debug("Legacy upload attempt %d of %d ...", tries + 1, config.retries)
try:
upload = pconn.upload_archive(tar_file, '', collection_duration)
except Exception as e:
display_upload_error_and_retry(config, tries, str(e))
continue

if upload.status_code in (200, 201):
api_response = json.loads(upload.text)
Expand Down Expand Up @@ -345,16 +350,7 @@ def _legacy_upload(config, pconn, tar_file, content_type, collection_duration=No
pconn.handle_fail_rcs(upload)
raise RuntimeError('Upload failed.')
else:
logger.error("Upload attempt %d of %d failed! Status Code: %s",
tries + 1, config.retries, upload.status_code)
if tries + 1 != config.retries:
logger.info("Waiting %d seconds then retrying",
constants.sleep_time)
time.sleep(constants.sleep_time)
else:
logger.error("All attempts to upload have failed!")
print("Please see %s for additional information" % config.logging_file)
raise RuntimeError('Upload failed.')
display_upload_error_and_retry(config, tries, "%s: %s" % (upload.status_code, upload.reason))
return api_response


Expand All @@ -363,7 +359,12 @@ def upload(config, pconn, tar_file, content_type, collection_duration=None):
return _legacy_upload(config, pconn, tar_file, content_type, collection_duration)
logger.info('Uploading Insights data.')
for tries in range(config.retries):
upload = pconn.upload_archive(tar_file, content_type, collection_duration)
logger.debug("Upload attempt %d of %d ...", tries + 1, config.retries)
try:
upload = pconn.upload_archive(tar_file, content_type, collection_duration)
except Exception as e:
display_upload_error_and_retry(config, tries, str(e))
continue

if upload.status_code in (200, 202):
write_to_disk(constants.lastupload_file)
Expand All @@ -378,13 +379,20 @@ def upload(config, pconn, tar_file, content_type, collection_duration=None):
pconn.handle_fail_rcs(upload)
raise RuntimeError('Upload failed.')
else:
logger.error("Upload attempt %d of %d failed! Status code: %s",
tries + 1, config.retries, upload.status_code)
if tries + 1 != config.retries:
logger.info("Waiting %d seconds then retrying",
constants.sleep_time)
time.sleep(constants.sleep_time)
else:
logger.error("All attempts to upload have failed!")
print("Please see %s for additional information" % config.logging_file)
raise RuntimeError('Upload failed.')
err_msg = "%s" % upload.status_code
if hasattr(upload, 'reason'):
err_msg += ": %s" % upload.reason
display_upload_error_and_retry(config, tries, err_msg)


def display_upload_error_and_retry(config, tries, error_message):
logger.error("Upload attempt %d of %d failed! Reason: %s",
tries + 1, config.retries, error_message)
if tries + 1 < config.retries:
logger.info("Waiting %d seconds then retrying",
constants.sleep_time)
time.sleep(constants.sleep_time)
else:
logger.error("All attempts to upload have failed!")
print("Please see %s for additional information" % config.logging_file)
raise RuntimeError('Upload failed.')
6 changes: 4 additions & 2 deletions insights/client/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,8 +837,10 @@ def _set_app_config(self):
Config values may have been set manually however, so need to take that into consideration
'''
if self.app == 'malware-detection':
if self.retries < 5:
self.retries = 5
# Add extra retries for malware, mainly because it could take a long time to run
# and the results archive shouldn't be discarded after a single failed upload attempt
if self.retries < 3:
self.retries = 3

def _determine_filename_and_extension(self):
'''
Expand Down
40 changes: 32 additions & 8 deletions insights/client/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,26 +188,44 @@ def _http_request(self, url, method, log_response_text=True, **kwargs):
HTTP response object
'''
logger.log(NETWORK, "%s %s", method, url)
res = self.session.request(url=url, method=method, timeout=self.config.http_timeout, **kwargs)
try:
res = self.session.request(url=url, method=method, timeout=self.config.http_timeout, **kwargs)
except Exception:
raise
logger.log(NETWORK, "HTTP Status: %d %s", res.status_code, res.reason)
if log_response_text or res.status_code != 200:
logger.log(NETWORK, "HTTP Response Text: %s", res.text)
return res

def get(self, url, **kwargs):
return self._http_request(url, 'GET', **kwargs)
try:
return self._http_request(url, 'GET', **kwargs)
except Exception:
raise

def post(self, url, **kwargs):
return self._http_request(url, 'POST', **kwargs)
try:
return self._http_request(url, 'POST', **kwargs)
except Exception:
raise

def put(self, url, **kwargs):
return self._http_request(url, 'PUT', **kwargs)
try:
return self._http_request(url, 'PUT', **kwargs)
except Exception:
raise

def patch(self, url, **kwargs):
return self._http_request(url, 'PATCH', **kwargs)
try:
return self._http_request(url, 'PATCH', **kwargs)
except Exception:
raise

def delete(self, url, **kwargs):
return self._http_request(url, 'DELETE', **kwargs)
try:
return self._http_request(url, 'DELETE', **kwargs)
except Exception:
raise

@property
def user_agent(self):
Expand Down Expand Up @@ -852,7 +870,10 @@ def _legacy_upload_archive(self, data_collected, duration):
logger.debug("Uploading %s to %s", data_collected, upload_url)

headers = {'x-rh-collection-time': str(duration)}
upload = self.post(upload_url, files=files, headers=headers)
try:
upload = self.post(upload_url, files=files, headers=headers)
except Exception:
raise

if upload.status_code in (200, 201):
the_json = json.loads(upload.text)
Expand Down Expand Up @@ -901,7 +922,10 @@ def upload_archive(self, data_collected, content_type, duration=None):
}
logger.debug('content-type: %s', content_type)
logger.debug("Uploading %s to %s", data_collected, upload_url)
upload = self.post(upload_url, files=files, headers={})
try:
upload = self.post(upload_url, files=files, headers={})
except Exception:
raise

logger.debug('Request ID: %s', upload.headers.get('x-rh-insights-request-id', None))
if upload.status_code in (200, 202):
Expand Down
2 changes: 2 additions & 0 deletions insights/tests/client/apps/test_malware_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,8 @@ def test_yara_versions(self, version_mock, exists_mock, log_mock, conf, rules, c
@patch.object(InsightsConnection, '_init_session', return_value=Mock())
@patch.object(MalwareDetectionClient, '_build_yara_command')
@patch.object(MalwareDetectionClient, '_load_config', return_value=CONFIG)
# NOTE: Downloading the malware rules file happens within the malware client code so it's possible to test it here
# However uploading the results archive is done outside the malware client code so it's not possible to test here
class TestGetRules:
""" Testing the _get_rules method """

Expand Down
47 changes: 45 additions & 2 deletions insights/tests/client/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from insights.client.config import InsightsConfig
from insights import package_info
from insights.client.constants import InsightsConstants as constants
from mock.mock import patch, Mock, call
from mock.mock import patch, Mock, call, ANY
from pytest import mark
from pytest import raises

Expand Down Expand Up @@ -313,7 +313,8 @@ def test_reg_check_unregistered_unreachable():
@patch('insights.client.client.InsightsConnection.upload_archive',
return_value=Mock(status_code=500))
@patch('insights.client.os.path.exists', return_value=True)
def test_upload_500_retry(_, upload_archive):
@patch("insights.client.client.logger")
def test_upload_500_retry(logger, _, upload_archive):

# Hack to prevent client from parsing args to py.test
tmp = sys.argv
Expand All @@ -329,10 +330,52 @@ def test_upload_500_retry(_, upload_archive):

upload_archive.assert_called()
assert upload_archive.call_count == retries
logger.error.assert_any_call("Upload attempt %d of %d failed! Reason: %s", 1, config.retries, ANY)
logger.error.assert_called_with("All attempts to upload have failed!")
finally:
sys.argv = tmp


@patch('insights.client.client.constants.sleep_time', 0)
@patch('insights.client.client.InsightsConnection.upload_archive')
@patch("insights.client.client.logger")
def test_upload_exception_retry(logger, upload_archive):
from requests.exceptions import ConnectionError, ProxyError, Timeout, HTTPError, SSLError
upload_archive.side_effect = [ConnectionError("Connection Error"),
ProxyError("Proxy Error"),
Timeout("Timeout Error")]
retries = 3

config = InsightsConfig(legacy_upload=False, logging_file='/tmp/insights.log', retries=retries)
client = InsightsClient(config)
with patch('insights.client.os.path.exists', return_value=True):
with pytest.raises(RuntimeError):
client.upload('/tmp/insights.tar.gz')
assert upload_archive.call_count == retries
logger.debug.assert_any_call("Upload attempt %d of %d ...", 1, config.retries)
logger.error.assert_any_call("Upload attempt %d of %d failed! Reason: %s", 1, config.retries, "Connection Error")
logger.error.assert_any_call("Upload attempt %d of %d failed! Reason: %s", 2, config.retries, "Proxy Error")
logger.error.assert_any_call("Upload attempt %d of %d failed! Reason: %s", 3, config.retries, "Timeout Error")
logger.error.assert_called_with("All attempts to upload have failed!")

# Test legacy uploads
logger.reset_mock()
upload_archive.reset_mock()
upload_archive.side_effect = [HTTPError("HTTP Error"),
SSLError("SSL Error")]
retries = 2
config = InsightsConfig(legacy_upload=True, logging_file='/tmp/insights.log', retries=retries)
client = InsightsClient(config)
with patch('insights.client.os.path.exists', return_value=True):
with pytest.raises(RuntimeError):
client.upload('/tmp/insights.tar.gz')
assert upload_archive.call_count == retries
logger.debug.assert_any_call("Legacy upload attempt %d of %d ...", 1, config.retries)
logger.error.assert_any_call("Upload attempt %d of %d failed! Reason: %s", 1, config.retries, "HTTP Error")
logger.error.assert_any_call("Upload attempt %d of %d failed! Reason: %s", 2, config.retries, "SSL Error")
logger.error.assert_called_with("All attempts to upload have failed!")


@patch('insights.client.client.InsightsConnection.handle_fail_rcs')
@patch('insights.client.client.InsightsConnection.upload_archive',
return_value=Mock(status_code=412))
Expand Down

0 comments on commit ce1cb34

Please sign in to comment.