Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new param for allowed number of retries before restart #78

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions superlance/httpok.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@
attempt to restart processes in the RUNNING state specified by
-p or -a. This defaults to 10 seconds.

-r -- The number of retries that httpok should attempt before restarting
processes. Only after no successful response after this number
of retry then httpok will attempt to take action on processes.
The default is 0 to act on first failed response.

-c -- specify an expected HTTP status code from a GET request to the
URL. If this status code is not the status code provided by the
response, httpok will attempt to restart processes in the
Expand Down Expand Up @@ -114,12 +119,14 @@ def usage():
class HTTPOk:
connclass = None
def __init__(self, rpc, programs, any, url, timeout, status, inbody,
email, sendmail, coredir, gcore, eager, retry_time):
email, sendmail, coredir, gcore, eager, retry_time, allowed_retries=0):
self.rpc = rpc
self.programs = programs
self.any = any
self.url = url
self.timeout = timeout
self.allowed_retries = allowed_retries
self.attempted_retries = 0
self.retry_time = retry_time
self.status = status
self.inbody = inbody
Expand Down Expand Up @@ -201,15 +208,29 @@ def runforever(self, test=False):

if str(status) != str(self.status):
subject = 'httpok for %s: bad status returned' % self.url
self.act(subject, msg)
self.retry_or_act(subject, msg)
elif self.inbody and self.inbody not in body:
subject = 'httpok for %s: bad body returned' % self.url
self.act(subject, msg)
self.retry_or_act(subject, msg)
else:
# reset this counter as we have a successful response here
self.attempted_retries = 0

childutils.listener.ok(self.stdout)
if test:
break

def retry_or_act(self, subject, msg):
if self.attempted_retries >= self.allowed_retries:
self.act(subject, msg)
else:
retries_left = self.allowed_retries - self.attempted_retries
self.attempted_retries += 1
self.stderr.write('Error occurred: %s\n' % msg)
self.stderr.write('Allowed number of retries not exceeded, '
'will try again %d more times.\n' % retries_left)
self.stderr.flush()

def act(self, subject, msg):
messages = [msg]

Expand Down Expand Up @@ -299,12 +320,13 @@ def restart(self, spec, write):

def main(argv=sys.argv):
import getopt
short_args="hp:at:c:b:s:m:g:d:eE"
short_args="hp:at:r:c:b:s:m:g:d:eE"
long_args=[
"help",
"program=",
"any",
"timeout=",
"retry=",
"code=",
"body=",
"sendmail_program=",
Expand Down Expand Up @@ -333,6 +355,7 @@ def main(argv=sys.argv):
eager = True
email = None
timeout = 10
allowed_retries = 1
retry_time = 10
status = '200'
inbody = None
Expand All @@ -357,6 +380,9 @@ def main(argv=sys.argv):
if option in ('-t', '--timeout'):
timeout = int(value)

if option in ('-r', '--retry'):
allowed_retries = int(value)

if option in ('-c', '--code'):
status = value

Expand Down Expand Up @@ -388,7 +414,7 @@ def main(argv=sys.argv):
return

prog = HTTPOk(rpc, programs, any, url, timeout, status, inbody, email,
sendmail, coredir, gcore, eager, retry_time)
sendmail, coredir, gcore, eager, retry_time, allowed_retries)
prog.runforever()

if __name__ == '__main__':
Expand Down
99 changes: 65 additions & 34 deletions superlance/tests/httpok_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@ def __init__(self, hostport):
self.hostport = hostport

def request(self, method, path, headers):
if exc:
if exc == True:
raise ValueError('foo')
else:
raise exc.pop()
error = exc.pop() if isinstance(exc, list) and exc else exc
if isinstance(error, BaseException):
raise error
elif error:
raise ValueError('foo')

self.method = method
self.path = path
self.headers = headers
Expand All @@ -63,7 +64,7 @@ def _makeOne(self, *opts):
return self._getTargetClass()(*opts)

def _makeOnePopulated(self, programs, any, response=None, exc=None,
gcore=None, coredir=None, eager=True):
gcore=None, coredir=None, eager=True, allowed_retries=0):
if response is None:
response = DummyResponse()
rpc = DummyRPCServer()
Expand All @@ -78,7 +79,7 @@ def _makeOnePopulated(self, programs, any, response=None, exc=None,
coredir = coredir
prog = self._makeOne(rpc, programs, any, url, timeout, status,
inbody, email, sendmail, coredir, gcore, eager,
retry_time)
retry_time, allowed_retries)
prog.stdin = StringIO()
prog.stdout = StringIO()
prog.stderr = StringIO()
Expand Down Expand Up @@ -130,9 +131,7 @@ def test_runforever_eager_error_on_request_some(self):
programs = ['foo', 'bar', 'baz_01', 'notexisting']
any = None
prog = self._makeOnePopulated(programs, any, exc=True)
prog.stdin.write('eventname:TICK len:0\n')
prog.stdin.seek(0)
prog.runforever(test=True)
self.tick(prog)
lines = prog.stderr.getvalue().split('\n')
#self.assertEqual(len(lines), 7)
self.assertEqual(lines[0],
Expand All @@ -156,9 +155,7 @@ def test_runforever_eager_error_on_request_any(self):
programs = []
any = True
prog = self._makeOnePopulated(programs, any, exc=True)
prog.stdin.write('eventname:TICK len:0\n')
prog.stdin.seek(0)
prog.runforever(test=True)
self.tick(prog)
lines = prog.stderr.getvalue().split('\n')
#self.assertEqual(len(lines), 6)
self.assertEqual(lines[0], 'Restarting all running processes')
Expand All @@ -178,9 +175,7 @@ def test_runforever_eager_error_on_process_stop(self):
any = False
prog = self._makeOnePopulated(programs, any, exc=True)
prog.rpc.supervisor.all_process_info = _FAIL
prog.stdin.write('eventname:TICK len:0\n')
prog.stdin.seek(0)
prog.runforever(test=True)
self.tick(prog)
lines = prog.stderr.getvalue().split('\n')
#self.assertEqual(len(lines), 5)
self.assertEqual(lines[0], "Restarting selected processes ['FAILED']")
Expand All @@ -199,9 +194,7 @@ def test_runforever_eager_error_on_process_start(self):
any = False
prog = self._makeOnePopulated(programs, any, exc=True)
prog.rpc.supervisor.all_process_info = _FAIL
prog.stdin.write('eventname:TICK len:0\n')
prog.stdin.seek(0)
prog.runforever(test=True)
self.tick(prog)
lines = prog.stderr.getvalue().split('\n')
#self.assertEqual(len(lines), 4)
self.assertEqual(lines[0],
Expand All @@ -221,9 +214,7 @@ def test_runforever_eager_gcore(self):
any = None
prog = self._makeOnePopulated(programs, any, exc=True, gcore="true",
coredir="/tmp")
prog.stdin.write('eventname:TICK len:0\n')
prog.stdin.seek(0)
prog.runforever(test=True)
self.tick(prog)
lines = prog.stderr.getvalue().split('\n')
self.assertEqual(lines[0],
("Restarting selected processes ['foo', 'bar', "
Expand All @@ -250,9 +241,7 @@ def test_runforever_not_eager_none_running(self):
any = None
prog = self._makeOnePopulated(programs, any, exc=True, gcore="true",
coredir="/tmp", eager=False)
prog.stdin.write('eventname:TICK len:0\n')
prog.stdin.seek(0)
prog.runforever(test=True)
self.tick(prog)
lines = [x for x in prog.stderr.getvalue().split('\n') if x]
self.assertEqual(len(lines), 0, lines)
self.assertFalse('mailed' in prog.__dict__)
Expand All @@ -261,9 +250,7 @@ def test_runforever_not_eager_running(self):
programs = ['foo', 'bar']
any = None
prog = self._makeOnePopulated(programs, any, exc=True, eager=False)
prog.stdin.write('eventname:TICK len:0\n')
prog.stdin.seek(0)
prog.runforever(test=True)
self.tick(prog)
lines = [x for x in prog.stderr.getvalue().split('\n') if x]
self.assertEqual(lines[0],
("Restarting selected processes ['foo', 'bar']")
Expand All @@ -283,9 +270,7 @@ def test_runforever_honor_timeout_on_connrefused(self):
error = socket.error()
error.errno = 111
prog = self._makeOnePopulated(programs, any, exc=[error], eager=False)
prog.stdin.write('eventname:TICK len:0\n')
prog.stdin.seek(0)
prog.runforever(test=True)
self.tick(prog)
self.assertEqual(prog.stderr.getvalue(), '')
self.assertEqual(prog.stdout.getvalue(), 'READY\nRESULT 2\nOK')

Expand All @@ -296,9 +281,7 @@ def test_runforever_connrefused_error(self):
error.errno = 111
prog = self._makeOnePopulated(programs, any,
exc=[error for x in range(100)], eager=False)
prog.stdin.write('eventname:TICK len:0\n')
prog.stdin.seek(0)
prog.runforever(test=True)
self.tick(prog)
lines = [x for x in prog.stderr.getvalue().split('\n') if x]
self.assertEqual(lines[0],
("Restarting selected processes ['foo', 'bar']")
Expand All @@ -312,5 +295,53 @@ def test_runforever_connrefused_error(self):
self.assertEqual(mailed[1],
'Subject: httpok for http://foo/bar: bad status returned')

def test_retry_before_restart(self):
programs = ['foo', 'bar']
any = None
prog = self._makeOnePopulated(programs, any, exc=True, eager=False, allowed_retries=2)

self.tick(prog)
lines = prog.stderr.getvalue().split('\n')
self.assertEqual(lines[-2], 'Allowed number of retries not exceeded, '
'will try again 2 more times.')

self.tick(prog)
lines = prog.stderr.getvalue().split('\n')
self.assertEqual(lines[-2], 'Allowed number of retries not exceeded, '
'will try again 1 more times.')

self.tick(prog)
new_lines = prog.stderr.getvalue().split('\n')[len(lines) - 1:]
self.assertEqual(new_lines[0], "Restarting selected processes ['foo', 'bar']")

def test_retry_success_reset_count(self):
programs = ['foo', 'bar']
any = None
prog = self._makeOnePopulated(programs, any, exc=[True, False, True],
eager=False, allowed_retries=1)

self.tick(prog)
lines = prog.stderr.getvalue().split('\n')
self.assertEqual(lines[-2], 'Allowed number of retries not exceeded, '
'will try again 1 more times.')

self.tick(prog)
new_lines = prog.stderr.getvalue().split('\n')
# nothing new is printed
self.assertListEqual(lines, new_lines)

self.tick(prog)
new_lines = prog.stderr.getvalue().split('\n')
# new retry notice is printed
self.assertTrue(len(new_lines) > len(lines))
self.assertEqual(lines[-2], 'Allowed number of retries not exceeded, '
'will try again 1 more times.')

def tick(self, prog):
prog.stdin.write('eventname:TICK len:0\n')
prog.stdin.seek(0)
prog.runforever(test=True)


if __name__ == '__main__':
unittest.main()