Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[openshift] add support for recycling StatefulSet #1627

Merged
merged 21 commits into from
Jun 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions reconcile/openshift_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from reconcile.utils.oc import PrimaryClusterIPCanNotBeUnsetError
from reconcile.utils.oc import InvalidValueApplyError
from reconcile.utils.oc import MetaDataAnnotationsTooLongApplyError
from reconcile.utils.oc import StatefulSetUpdateForbidden
from reconcile.utils.oc import OC_Map
from reconcile.utils.oc import StatusCodeError
from reconcile.utils.oc import UnsupportedMediaTypeError
Expand Down Expand Up @@ -252,6 +253,24 @@ def apply(dry_run, oc_map, cluster, namespace, resource_type, resource,
oc.delete(namespace=namespace, kind=resource_type,
name=resource.name)
oc.apply(namespace=namespace, resource=annotated)
except StatefulSetUpdateForbidden:
if resource_type != 'StatefulSet':
raise

logging.info(['delete_sts_and_apply', cluster, namespace,
resource_type, resource.name])
owned_pods = oc.get_owned_pods(namespace, resource)
oc.delete(namespace=namespace, kind=resource_type,
name=resource.name, cascade=False)
oc.apply(namespace=namespace, resource=annotated)
logging.info(['recycle_sts_pods', cluster, namespace,
resource_type, resource.name])
# the resource was applied without cascading, we proceed
# to recycle the pods belonging to the old resource.
# note: we really just delete pods and let the new resource
# recreate them. we delete one by one and wait for a new
# pod to become ready before proceeding to the next one.
oc.recycle_orphan_pods(namespace, owned_pods)

if recycle_pods:
oc.recycle_pods(dry_run, namespace, resource_type, resource)
Expand Down
157 changes: 157 additions & 0 deletions reconcile/test/test_utils_oc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
from unittest import TestCase
from unittest.mock import patch

from reconcile.utils.oc import OC, PodNotReadyError
from reconcile.utils.openshift_resource import OpenshiftResource as OR


class TestGetOwnedPods(TestCase):
@patch.object(OC, 'get')
@patch.object(OC, 'get_obj_root_owner')
def test_get_owned_pods(self, oc_get_obj_root_owner, oc_get):
owner_body = {
'kind': 'ownerkind',
'metadata': {'name': 'ownername'}
}
owner_resource = OR(owner_body, '', '')

oc_get.return_value = {
'items': [
{
'metadata': {
'name': 'pod1',
'ownerReferences': [
{
'controller': True,
'kind': 'ownerkind',
'name': 'ownername'
}
]
}
},
{
'metadata': {
'name': 'pod2',
'ownerReferences': [
{
'controller': True,
'kind': 'notownerkind',
'name': 'notownername'
}
]
}
},
{
'metadata': {
'name': 'pod3',
'ownerReferences': [
{
'controller': True,
'kind': 'ownerkind',
'name': 'notownername'
}
]
}
},
]
}
oc_get_obj_root_owner.side_effect = [
owner_resource.body,
{
'kind': 'notownerkind',
'metadata': {'name': 'notownername'},
},
{
'kind': 'ownerkind',
'metadata': {'name': 'notownername'}
}
]

oc = OC('server', 'token', local=True)
pods = oc.get_owned_pods('namespace', owner_resource)
self.assertEqual(len(pods), 1)
self.assertEqual(pods[0]['metadata']['name'], 'pod1')


class TestValidatePodReady(TestCase):
@staticmethod
@patch.object(OC, 'get')
def test_validate_pod_ready_all_good(oc_get):
oc_get.return_value = {
'status': {
'containerStatuses': [
{
'name': 'container1',
'ready': True,
},
{
'name': 'container2',
'ready': True
}
]
}
}
oc = OC('server', 'token', local=True)
oc.validate_pod_ready('namespace', 'podname')

@patch.object(OC, 'get')
def test_validate_pod_ready_one_missing(self, oc_get):
oc_get.return_value = {
'status': {
'containerStatuses': [
{
'name': 'container1',
'ready': True,
},
{
'name': 'container2',
'ready': False
}
]
}
}

oc = OC('server', 'token', local=True)
with self.assertRaises(PodNotReadyError):
# Bypass the retry stuff
oc.validate_pod_ready.__wrapped__(oc, 'namespace', 'podname')


class TestGetObjRootOwner(TestCase):
@patch.object(OC, 'get')
def test_owner(self, oc_get):
obj = {
'metadata': {
'name': 'pod1',
'ownerReferences': [
{
'controller': True,
'kind': 'ownerkind',
'name': 'ownername'
}
]
}
}
owner_obj = {
'kind': 'ownerkind',
'metadata': {'name': 'ownername'}
}

oc_get.side_effect = [
owner_obj
]

oc = OC('server', 'token', local=True)
result_owner_obj = oc.get_obj_root_owner('namespace', obj)
self.assertEqual(result_owner_obj, owner_obj)

def test_no_owner(self):
obj = {
'metadata': {
'name': 'pod1',
}
}

oc = OC('server', 'token', local=True)
result_obj = oc.get_obj_root_owner('namespace', obj)
self.assertEqual(result_obj, obj)
36 changes: 34 additions & 2 deletions reconcile/utils/oc.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ class UnsupportedMediaTypeError(Exception):
pass


class StatefulSetUpdateForbidden(Exception):
pass


class NoOutputError(Exception):
pass

Expand Down Expand Up @@ -298,8 +302,9 @@ def replace(self, namespace, resource):
return self._msg_to_process_reconcile_time(namespace, resource.body)

@OCDecorators.process_reconcile_time
def delete(self, namespace, kind, name):
cmd = ['delete', '-n', namespace, kind, name]
def delete(self, namespace, kind, name, cascade=True):
cmd = ['delete', '-n', namespace, kind, name,
f'--cascade={str(cascade).lower()}']
self._run(cmd)
resource = {'kind': kind, 'metadata': {'name': name}}
return self._msg_to_process_reconcile_time(namespace, resource)
Expand Down Expand Up @@ -439,6 +444,31 @@ def get_service_account_username(user):
name = user.split('/')[1]
return "system:serviceaccount:{}:{}".format(namespace, name)

def get_owned_pods(self, namespace, resource):
pods = self.get(namespace, 'Pod')['items']
owned_pods = []
for p in pods:
owner = self.get_obj_root_owner(namespace, p)
if (resource.kind, resource.name) == \
(owner['kind'], owner['metadata']['name']):
owned_pods.append(p)

return owned_pods

def recycle_orphan_pods(self, namespace, pods):
for p in pods:
name = p['metadata']['name']
self.delete(namespace, 'Pod', name)
self.validate_pod_ready(namespace, name)

@retry(max_attempts=20)
def validate_pod_ready(self, namespace, name):
rporres marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Methods that check some status and force the program to halt if it doesn't meet some conditions are usually assertions. In English it reads clearer if this is called assert_pod_ready(...).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i have an old itch from my days as a test automation developer that assertions only happen in tests, and using "assert" in a function name indicates using an assertion?

Copy link
Contributor

@Piojo Piojo Jun 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, in my eyes this acts like one: it is called in the middle of some processing and breaks the flow entirely by raising an exception/aborting whatever if some consistency condition isn't met. It is true that this one isn't meant to be disabled with -DNDEBUG.

It was a nit, just that I like it when the naming makes it clear that this is an integrity check and that normal flow of the program can break here.

logging.info([self.validate_pod_ready.__name__, namespace, name])
pod = self.get(namespace, 'Pod', name)
for status in pod['status']['containerStatuses']:
if not status['ready']:
raise PodNotReadyError(name)
maorfr marked this conversation as resolved.
Show resolved Hide resolved

def recycle_pods(self, dry_run, namespace, dep_kind, dep_resource):
""" recycles pods which are using the specified resources.
will only act on Secrets containing the 'qontract.recycle' annotation.
Expand Down Expand Up @@ -614,6 +644,8 @@ def _run(self, cmd, **kwargs):
f"[{self.server}]: {err}")
if 'UnsupportedMediaType' in err:
raise UnsupportedMediaTypeError(f"[{self.server}]: {err}")
if 'updates to statefulset spec for fields other than' in err:
raise StatefulSetUpdateForbidden(f"[{self.server}]: {err}")
if not (allow_not_found and 'NotFound' in err):
raise StatusCodeError(f"[{self.server}]: {err}")

Expand Down