Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement dry-run for install/upgrade process #464

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 18 additions & 11 deletions kubemarine/admission.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,9 @@ def install_psp_task(cluster):
if not is_security_enabled(cluster.inventory):
cluster.log.debug("Pod security disabled, skipping policies installation...")
return

if utils.check_dry_run_status_active(cluster):
cluster.log.debug("[dry-run] Installing Pod security policies...")
return
first_control_plane = cluster.nodes["control-plane"].get_first_member()

cluster.log.debug("Installing OOB policies...")
Expand Down Expand Up @@ -807,25 +809,30 @@ def update_finalized_inventory(cluster, inventory_to_finalize):

return inventory_to_finalize


def copy_pss(group):
if group.cluster.inventory['rbac']['admission'] != "pss":
cluster = group.cluster
if cluster.inventory['rbac']['admission'] != "pss":
return
if group.cluster.context.get('initial_procedure') == 'manage_pss':
if not is_security_enabled(group.cluster.inventory) and \
group.cluster.procedure_inventory["pss"]["pod-security"] != "enabled":
group.cluster.log.debug("Pod security disabled, skipping pod admission installation...")
if cluster.context.get('initial_procedure') == 'manage_pss':
if not is_security_enabled(cluster.inventory) and \
cluster.procedure_inventory["pss"]["pod-security"] != "enabled":
cluster.log.debug("Pod security disabled, skipping pod admission installation...")
return
if group.cluster.context.get('initial_procedure') == 'install':
if cluster.context.get('initial_procedure') == 'install':
if not is_security_enabled(group.cluster.inventory):
group.cluster.log.debug("Pod security disabled, skipping pod admission installation...")
cluster.log.debug("Pod security disabled, skipping pod admission installation...")
return

defaults = group.cluster.inventory["rbac"]["pss"]["defaults"]
exemptions = group.cluster.inventory["rbac"]["pss"]["exemptions"]
defaults = cluster.inventory["rbac"]["pss"]["defaults"]
exemptions = cluster.inventory["rbac"]["pss"]["exemptions"]
# create admission config from template and cluster.yaml
admission_config = Template(utils.read_internal(admission_template))\
.render(defaults=defaults,exemptions=exemptions)
.render(defaults=defaults, exemptions=exemptions)

if utils.check_dry_run_status_active(cluster):
cluster.log.debug("Copying Admission config files")
return None
# put admission config on every control-planes
filename = uuid.uuid4().hex
remote_path = tmp_filepath_pattern % filename
Expand Down
16 changes: 10 additions & 6 deletions kubemarine/apt.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,32 @@ def backup_repo(group, repo_filename="*", **kwargs) -> NodeGroupResult or None:
if not group.cluster.inventory['services']['packages']['package_manager']['replace-repositories']:
group.cluster.log.debug("Skipped - repos replacement disabled in configuration")
return
dry_run = utils.check_dry_run_status_active(group.cluster)
# all files in directory will be renamed: xxx.repo -> xxx.repo.bak
# if there already any files with ".bak" extension, they should not be renamed to ".bak.bak"!
return group.sudo("find %s -type f -name '%s.list' | "
"sudo xargs -t -iNAME mv -bf NAME NAME.bak" % ("/etc/apt/", repo_filename), **kwargs)
"sudo xargs -t -iNAME mv -bf NAME NAME.bak" % ("/etc/apt/", repo_filename),
dry_run=dry_run, **kwargs)


def add_repo(group, repo_data="", repo_filename="predefined", **kwargs) -> NodeGroupResult:
create_repo_file(group, repo_data, get_repo_file_name(repo_filename))
return group.sudo(DEBIAN_HEADERS + 'apt clean && sudo apt update', **kwargs)
dry_run = utils.check_dry_run_status_active(group.cluster)
create_repo_file(group, repo_data, get_repo_file_name(repo_filename), dry_run)
return group.sudo(DEBIAN_HEADERS + 'apt clean && sudo apt update',
dry_run=dry_run, **kwargs)


def get_repo_file_name(repo_filename="predefined"):
return '%s/%s.list' % ("/etc/apt/sources.list.d/", repo_filename)


def create_repo_file(group, repo_data, repo_file):
def create_repo_file(group, repo_data, repo_file, dry_run=False):
# if repo_data is list, then convert it to string using join
if isinstance(repo_data, list):
repo_data_str = "\n".join(repo_data) + "\n"
else:
repo_data_str = utils.read_external(repo_data)
group.put(io.StringIO(repo_data_str), repo_file, sudo=True)
group.put(io.StringIO(repo_data_str), repo_file, sudo=True, dry_run=dry_run)


def clean(group, **kwargs) -> NodeGroupResult:
Expand All @@ -78,7 +82,7 @@ def install(group, include=None, exclude=None, **kwargs) -> NodeGroupResult:

command = get_install_cmd(include, exclude)

return group.sudo(command, **kwargs)
return group.sudo(command, dry_run=utils.check_dry_run_status_active(group.cluster), **kwargs)
# apt fails to install (downgrade) package if it is already present and has higher version,
# thus we do not need additional checks here (in contrast to yum)

Expand Down
6 changes: 6 additions & 0 deletions kubemarine/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ def install(group: NodeGroup) -> str or None:
else:
log.debug(f'Auditd package is not installed on {not_installed_hosts}, installing...')

if utils.check_dry_run_status_active(group.cluster):
return None

with RemoteExecutor(cluster) as exe:
for host in not_installed_hosts:
the_node = cluster.make_group([host])
Expand All @@ -94,6 +97,9 @@ def apply_audit_rules(group: NodeGroup) -> NodeGroupResult:
rules_content = " \n".join(cluster.inventory['services']['audit']['rules'])
utils.dump_file(cluster, rules_content, 'audit.rules')

if utils.check_dry_run_status_active(group.cluster):
return None

restart_tokens = []
with RemoteExecutor(cluster) as exe:
for node in group.get_ordered_members_list(provide_node_configs=True):
Expand Down
2 changes: 1 addition & 1 deletion kubemarine/core/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def enrich_inventory(cluster: KubernetesCluster, inventory: dict, make_dumps=Tru
if make_dumps:
from kubemarine import controlplane
inventory_for_dump = controlplane.controlplane_finalize_inventory(cluster, prepare_for_dump(inventory))
utils.dump_file(cluster, yaml.dump(inventory_for_dump, ), "cluster.yaml")
# utils.dump_file(cluster, yaml.dump(inventory_for_dump, ), "cluster.yaml")

return inventory

Expand Down
4 changes: 3 additions & 1 deletion kubemarine/core/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,14 +290,16 @@ def throw_on_failed(self):
"""
self.get_merged_result()

def get_merged_result(self):
def get_merged_result(self, dry_run=False):
"""
Returns last results, merged into NodeGroupResult. If any node failed, throws GroupException.

The method is useful to propagate exceptions, or to check result in case only one command per node is executed.

:return: NodeGroupResult
"""
if dry_run:
return None
executor = self._get_active_executor()
if len(executor.results) == 0:
return None
Expand Down
3 changes: 1 addition & 2 deletions kubemarine/core/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,8 @@ def run_tasks(resources: res.DynamicResources, tasks, cumulative_points=None, ta
cluster = resources.cluster()

if args.get('without_act', False):
cluster.context["dry_run"] = True
Comment on lines 188 to +189
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest to not recreate the inventory on the deployer. For example, if you run upgrade, the input cluster.yaml will be changed. This does not seem as "dry-run". I suggest to create separate file in dump/ and reflect the changes there.

resources.context['preserve_inventory'] = False
cluster.log.debug('\nFurther acting manually disabled')
return

init_tasks_flow(cluster)
run_tasks_recursive(tasks, final_list, cluster, cumulative_points, [])
Expand Down
33 changes: 33 additions & 0 deletions kubemarine/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,35 @@ def do(self: 'NodeGroup', *args, logging_stream_level: int = None, **kwargs):
return do


def _handle_dry_run(fn: callable) -> callable:
"""
Method is a decorator that handles internal streaming of output (hide=False) of fabric (invoke).
Note! This decorator should be the outermost.
:param fn: Origin function to apply annotation to
:return: Validation wrapper function
"""
def do_dry_run(self: 'NodeGroup', *args, **kwargs):
results = {}

if kwargs.get("dry_run"):
if fn.__name__ == "put":
self.cluster.log.verbose("Local file \"%s\" is being transferred to remote file \"%s\" on nodes %s with options %s"
% (args[0], args[1], list(self.nodes.keys()), kwargs))
else:
self.cluster.log.verbose('Performing %s %s on nodes %s with options: %s' % (fn.__name__, args[0], list(self.nodes.keys()), kwargs))
return NodeGroupResult(self.cluster, results)
elif "dry_run" in kwargs.keys():
del kwargs["dry_run"]
try:
results = fn(self, *args, **kwargs)
return results
except fabric.group.GroupException as e:
results = e.result
raise

return do_dry_run


class NodeGroup:

def __init__(self, connections: Connections, cluster):
Expand Down Expand Up @@ -399,13 +428,16 @@ def _make_result_or_fail(self, results: _HostToResult,
return group_result

@_handle_internal_logging
@_handle_dry_run
def run(self, *args, **kwargs) -> Union[NodeGroupResult, int]:
Copy link
Contributor

@ilia1243 ilia1243 Jun 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest to not call NodeGroup.run/sudo/put for RW operations in dry run at all. The reason is it is difficult to simulate its behaviour. For example, you return empty results, but in fact it returns not empty results in real run. So this can lead to unpredictable changes in behaviour of Kubemarine. Also, put does not return result at all, run/sudo might return int in some cases, and so on...

return self.do("run", *args, **kwargs)

@_handle_internal_logging
@_handle_dry_run
def sudo(self, *args, **kwargs) -> Union[NodeGroupResult, int]:
return self.do("sudo", *args, **kwargs)

@_handle_dry_run
def put(self, local_file: Union[io.StringIO, str], remote_file: str, **kwargs):
if isinstance(local_file, io.StringIO):
self.cluster.log.verbose("Text is being transferred to remote file \"%s\" on nodes %s with options %s"
Expand Down Expand Up @@ -441,6 +473,7 @@ def put(self, local_file: Union[io.StringIO, str], remote_file: str, **kwargs):
with open(local_file, "rb") as local_stream:
group_to_upload._put(local_stream, remote_file, **kwargs)

@_handle_dry_run
def _put(self, local_stream: IO, remote_file: str, **kwargs):
hide = kwargs.pop("hide", True) is True
sudo = kwargs.pop("sudo", False) is True
Expand Down
4 changes: 4 additions & 0 deletions kubemarine/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,10 @@ def _test_version(version: str, numbers_amount: int) -> list:
raise ValueError(f'Incorrect version \"{version}\" format, expected version pattern is \"{expected_pattern}\"')


def check_dry_run_status_active(cluster):
return cluster.context.get("dry_run")


class ClusterStorage:
"""
File preservation:
Expand Down
12 changes: 6 additions & 6 deletions kubemarine/coredns.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,18 +143,18 @@ def generate_configmap(inventory):
return config + '\n'


def apply_configmap(cluster, config):
def apply_configmap(cluster, config, dry_run=False):
utils.dump_file(cluster, config, 'coredns-configmap.yaml')

group = cluster.nodes['control-plane'].include_group(cluster.nodes.get('worker')).get_final_nodes()
group.put(io.StringIO(config), '/etc/kubernetes/coredns-configmap.yaml', backup=True, sudo=True)
group.put(io.StringIO(config), '/etc/kubernetes/coredns-configmap.yaml', backup=True, sudo=True, dry_run=dry_run)

return cluster.nodes['control-plane'].get_final_nodes().get_first_member()\
.sudo('kubectl apply -f /etc/kubernetes/coredns-configmap.yaml && '
'sudo kubectl rollout restart -n kube-system deployment/coredns')
'sudo kubectl rollout restart -n kube-system deployment/coredns', dry_run=dry_run)


def apply_patch(cluster):
def apply_patch(cluster, dry_run=False):
apply_command = ''

for config_type in ['deployment']:
Expand All @@ -172,11 +172,11 @@ def apply_patch(cluster):
utils.dump_file(cluster, config, filename)

group = cluster.nodes['control-plane'].include_group(cluster.nodes.get('worker')).get_final_nodes()
group.put(io.StringIO(config), filepath, backup=True, sudo=True)
group.put(io.StringIO(config), filepath, backup=True, sudo=True, dry_run=dry_run)

apply_command = 'kubectl patch %s coredns -n kube-system --type merge -p \"$(sudo cat %s)\"' % (config_type, filepath)

if apply_command == '':
return 'Nothing to patch'

return cluster.nodes['control-plane'].get_final_nodes().get_first_member().sudo(apply_command)
return cluster.nodes['control-plane'].get_final_nodes().get_first_member().sudo(apply_command, dry_run=dry_run)
19 changes: 13 additions & 6 deletions kubemarine/cri/containerd.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@


def install(group: NodeGroup):
if utils.check_dry_run_status_active(group.cluster):
group.cluster.log.debug("[dry-run] Installing Containerd")
return None
with RemoteExecutor(group.cluster) as exe:
for node in group.get_ordered_members_list(provide_node_configs=True):
os_specific_associations = group.cluster.get_associations_for_node(node['connect_to'], 'containerd')
Expand All @@ -49,7 +52,8 @@ def configure(group: NodeGroup):
log.debug("Uploading crictl configuration for containerd...")
crictl_config = yaml.dump({"runtime-endpoint": "unix:///run/containerd/containerd.sock"})
utils.dump_file(group.cluster, crictl_config, 'crictl.yaml')
group.put(StringIO(crictl_config), '/etc/crictl.yaml', backup=True, sudo=True)
dry_run = utils.check_dry_run_status_active(group.cluster)
group.put(StringIO(crictl_config), '/etc/crictl.yaml', backup=True, sudo=True, dry_run=dry_run)

config_string = ""
# double loop is used to make sure that no "simple" `key: value` pairs are accidentally assigned to sections
Expand Down Expand Up @@ -89,19 +93,22 @@ def configure(group: NodeGroup):
if registry_configs[auth_registry].get('auth', {}).get('auth', ''):
auth_registries['auths'][auth_registry]['auth'] = registry_configs[auth_registry]['auth']['auth']
auth_json = json.dumps(auth_registries)
group.put(StringIO(auth_json), "/etc/containers/auth.json", backup=True, sudo=True)
group.sudo("chmod 600 /etc/containers/auth.json")
group.put(StringIO(auth_json), "/etc/containers/auth.json", backup=True, sudo=True, dry_run=dry_run)
group.sudo("chmod 600 /etc/containers/auth.json", dry_run=dry_run)
if insecure_registries:
log.debug("Uploading podman configuration...")
podman_registries = f"[registries.insecure]\nregistries = {insecure_registries}\n"
utils.dump_file(group.cluster, podman_registries, 'podman_registries.conf')
group.sudo("mkdir -p /etc/containers/")
group.put(StringIO(podman_registries), "/etc/containers/registries.conf", backup=True, sudo=True)
group.sudo("mkdir -p /etc/containers/", dry_run=dry_run)
group.put(StringIO(podman_registries), "/etc/containers/registries.conf", backup=True, sudo=True, dry_run=dry_run)
else:
log.debug("Removing old podman configuration...")
group.sudo("rm -f /etc/containers/registries.conf")
group.sudo("rm -f /etc/containers/registries.conf", dry_run=dry_run)

utils.dump_file(group.cluster, config_string, 'containerd-config.toml')
if dry_run:
return None

with RemoteExecutor(group.cluster) as exe:
for node in group.get_ordered_members_list(provide_node_configs=True):
os_specific_associations = group.cluster.get_associations_for_node(node['connect_to'], 'containerd')
Expand Down
5 changes: 5 additions & 0 deletions kubemarine/cri/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@


def install(group: NodeGroup):
if utils.check_dry_run_status_active(group.cluster):
group.cluster.log.debug("[dry-run] Installing Docker")
return None
with RemoteExecutor(group.cluster) as exe:
for node in group.get_ordered_members_list(provide_node_configs=True):
os_specific_associations = group.cluster.get_associations_for_node(node['connect_to'], 'docker')
Expand Down Expand Up @@ -56,6 +59,8 @@ def configure(group: NodeGroup):

settings_json = json.dumps(group.cluster.inventory["services"]['cri']['dockerConfig'], sort_keys=True, indent=4)
utils.dump_file(group.cluster, settings_json, 'docker-daemon.json')
if utils.check_dry_run_status_active(group.cluster):
group.cluster.log.debug("[dry-run] Configuring Docker")

with RemoteExecutor(group.cluster) as exe:
for node in group.get_ordered_members_list(provide_node_configs=True):
Expand Down
16 changes: 9 additions & 7 deletions kubemarine/keepalived.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,11 @@ def uninstall(group):
return packages.remove(group, include='keepalived')


def restart(group: NodeGroup):
def restart(group: NodeGroup, dry_run=False):
cluster = group.cluster
cluster.log.debug("Restarting keepalived in all group...")
if dry_run:
return
with RemoteExecutor(cluster):
for node in group.get_ordered_members_list(provide_node_configs=True):
service_name = group.cluster.get_package_association_for_node(
Expand Down Expand Up @@ -260,7 +262,7 @@ def generate_config(inventory, node):
def configure(group: NodeGroup) -> NodeGroupResult:
log = group.cluster.log
group_members = group.get_ordered_members_list(provide_node_configs=True)

dry_run = utils.check_dry_run_status_active(group.cluster)
with RemoteExecutor(group.cluster):
for node in group_members:

Expand All @@ -269,15 +271,15 @@ def configure(group: NodeGroup) -> NodeGroupResult:
package_associations = group.cluster.get_associations_for_node(node['connect_to'], 'keepalived')
configs_directory = '/'.join(package_associations['config_location'].split('/')[:-1])

group.sudo('mkdir -p %s' % configs_directory, hide=True)
group.sudo('mkdir -p %s' % configs_directory, hide=True, dry_run=dry_run)

config = generate_config(group.cluster.inventory, node)
utils.dump_file(group.cluster, config, 'keepalived_%s.conf' % node['name'])

node['connection'].put(io.StringIO(config), package_associations['config_location'], sudo=True)
node['connection'].put(io.StringIO(config), package_associations['config_location'], sudo=True, dry_run=dry_run)

log.debug(group.sudo('ls -la %s' % package_associations['config_location']))
log.debug(group.sudo('ls -la %s' % package_associations['config_location'], dry_run=dry_run))

restart(group)
restart(group, dry_run)

return group.sudo('systemctl status %s' % package_associations['service_name'], warn=True)
return group.sudo('systemctl status %s' % package_associations['service_name'], warn=True, dry_run=dry_run)
Loading