From 9562c6027b2eb6f038d60808af7fbc5c672111c0 Mon Sep 17 00:00:00 2001 From: Vasudeo Nimbekar Date: Mon, 19 Jun 2023 16:12:52 +0530 Subject: [PATCH 1/8] Kubemarine install dry-run --- kubemarine/admission.py | 29 ++++++----- kubemarine/apt.py | 14 ++++-- kubemarine/audit.py | 6 +++ kubemarine/core/executor.py | 4 +- kubemarine/core/flow.py | 3 +- kubemarine/core/group.py | 31 ++++++++++++ kubemarine/core/utils.py | 4 ++ kubemarine/coredns.py | 12 ++--- kubemarine/cri/containerd.py | 19 +++++--- kubemarine/cri/docker.py | 5 ++ kubemarine/keepalived.py | 16 +++--- kubemarine/kubernetes/__init__.py | 81 ++++++++++++++++++++++--------- kubemarine/kubernetes_accounts.py | 24 +++++---- kubemarine/plugins/__init__.py | 9 ++-- kubemarine/procedures/install.py | 52 +++++++++++++++----- kubemarine/selinux.py | 5 +- kubemarine/sysctl.py | 10 ++-- kubemarine/system.py | 43 +++++++++++----- kubemarine/thirdparties.py | 16 +++--- 19 files changed, 270 insertions(+), 113 deletions(-) diff --git a/kubemarine/admission.py b/kubemarine/admission.py index 57e2c9651..03a713397 100644 --- a/kubemarine/admission.py +++ b/kubemarine/admission.py @@ -234,7 +234,9 @@ def install_psp_task(cluster): if not is_security_enabled(cluster.inventory): cluster.log.debug("Pod security disabled, skipping policies installation...") return - + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Installing Pod security policies...") + return first_control_plane = cluster.nodes["control-plane"].get_first_member() cluster.log.debug("Installing OOB policies...") @@ -807,25 +809,30 @@ def update_finalized_inventory(cluster, inventory_to_finalize): return inventory_to_finalize + def copy_pss(group): - if group.cluster.inventory['rbac']['admission'] != "pss": + cluster = group.cluster + if cluster.inventory['rbac']['admission'] != "pss": return - if group.cluster.context.get('initial_procedure') == 'manage_pss': - if not is_security_enabled(group.cluster.inventory) and \ - group.cluster.procedure_inventory["pss"]["pod-security"] != "enabled": - group.cluster.log.debug("Pod security disabled, skipping pod admission installation...") + if cluster.context.get('initial_procedure') == 'manage_pss': + if not is_security_enabled(cluster.inventory) and \ + cluster.procedure_inventory["pss"]["pod-security"] != "enabled": + cluster.log.debug("Pod security disabled, skipping pod admission installation...") return - if group.cluster.context.get('initial_procedure') == 'install': + if cluster.context.get('initial_procedure') == 'install': if not is_security_enabled(group.cluster.inventory): - group.cluster.log.debug("Pod security disabled, skipping pod admission installation...") + cluster.log.debug("Pod security disabled, skipping pod admission installation...") return - defaults = group.cluster.inventory["rbac"]["pss"]["defaults"] - exemptions = group.cluster.inventory["rbac"]["pss"]["exemptions"] + defaults = cluster.inventory["rbac"]["pss"]["defaults"] + exemptions = cluster.inventory["rbac"]["pss"]["exemptions"] # create admission config from template and cluster.yaml admission_config = Template(utils.read_internal(admission_template))\ - .render(defaults=defaults,exemptions=exemptions) + .render(defaults=defaults, exemptions=exemptions) + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("Copying Admission config files") + return None # put admission config on every control-planes filename = uuid.uuid4().hex remote_path = tmp_filepath_pattern % filename diff --git a/kubemarine/apt.py b/kubemarine/apt.py index e489906ba..e5733c321 100644 --- a/kubemarine/apt.py +++ b/kubemarine/apt.py @@ -30,28 +30,32 @@ def backup_repo(group, repo_filename="*", **kwargs) -> NodeGroupResult or None: if not group.cluster.inventory['services']['packages']['package_manager']['replace-repositories']: group.cluster.log.debug("Skipped - repos replacement disabled in configuration") return + dry_run = utils.check_dry_run_status_active(group.cluster) # all files in directory will be renamed: xxx.repo -> xxx.repo.bak # if there already any files with ".bak" extension, they should not be renamed to ".bak.bak"! return group.sudo("find %s -type f -name '%s.list' | " - "sudo xargs -t -iNAME mv -bf NAME NAME.bak" % ("/etc/apt/", repo_filename), **kwargs) + "sudo xargs -t -iNAME mv -bf NAME NAME.bak" % ("/etc/apt/", repo_filename), + dry_run=dry_run, **kwargs) def add_repo(group, repo_data="", repo_filename="predefined", **kwargs) -> NodeGroupResult: - create_repo_file(group, repo_data, get_repo_file_name(repo_filename)) - return group.sudo(DEBIAN_HEADERS + 'apt clean && sudo apt update', **kwargs) + dry_run = utils.check_dry_run_status_active(group.cluster) + create_repo_file(group, repo_data, get_repo_file_name(repo_filename), dry_run) + return group.sudo(DEBIAN_HEADERS + 'apt clean && sudo apt update', + dry_run=dry_run, **kwargs) def get_repo_file_name(repo_filename="predefined"): return '%s/%s.list' % ("/etc/apt/sources.list.d/", repo_filename) -def create_repo_file(group, repo_data, repo_file): +def create_repo_file(group, repo_data, repo_file, dry_run=False): # if repo_data is list, then convert it to string using join if isinstance(repo_data, list): repo_data_str = "\n".join(repo_data) + "\n" else: repo_data_str = utils.read_external(repo_data) - group.put(io.StringIO(repo_data_str), repo_file, sudo=True) + group.put(io.StringIO(repo_data_str), repo_file, sudo=True, dry_run=dry_run) def clean(group, **kwargs) -> NodeGroupResult: diff --git a/kubemarine/audit.py b/kubemarine/audit.py index dbf2d4dc1..202ff719b 100644 --- a/kubemarine/audit.py +++ b/kubemarine/audit.py @@ -68,6 +68,9 @@ def install(group: NodeGroup) -> str or None: else: log.debug(f'Auditd package is not installed on {not_installed_hosts}, installing...') + if utils.check_dry_run_status_active(group.cluster): + return None + with RemoteExecutor(cluster) as exe: for host in not_installed_hosts: the_node = cluster.make_group([host]) @@ -94,6 +97,9 @@ def apply_audit_rules(group: NodeGroup) -> NodeGroupResult: rules_content = " \n".join(cluster.inventory['services']['audit']['rules']) utils.dump_file(cluster, rules_content, 'audit.rules') + if utils.check_dry_run_status_active(group.cluster): + return None + restart_tokens = [] with RemoteExecutor(cluster) as exe: for node in group.get_ordered_members_list(provide_node_configs=True): diff --git a/kubemarine/core/executor.py b/kubemarine/core/executor.py index 08c44d73b..1ae5915ee 100644 --- a/kubemarine/core/executor.py +++ b/kubemarine/core/executor.py @@ -290,7 +290,7 @@ def throw_on_failed(self): """ self.get_merged_result() - def get_merged_result(self): + def get_merged_result(self, dry_run=False): """ Returns last results, merged into NodeGroupResult. If any node failed, throws GroupException. @@ -298,6 +298,8 @@ def get_merged_result(self): :return: NodeGroupResult """ + if dry_run: + return None executor = self._get_active_executor() if len(executor.results) == 0: return None diff --git a/kubemarine/core/flow.py b/kubemarine/core/flow.py index ed0862ba1..4e55bed7c 100755 --- a/kubemarine/core/flow.py +++ b/kubemarine/core/flow.py @@ -184,9 +184,8 @@ def run_tasks(resources: res.DynamicResources, tasks, cumulative_points=None, ta cluster = resources.cluster() if args.get('without_act', False): + cluster.context["dry_run"] = True resources.context['preserve_inventory'] = False - cluster.log.debug('\nFurther acting manually disabled') - return init_tasks_flow(cluster) run_tasks_recursive(tasks, final_list, cluster, cumulative_points, []) diff --git a/kubemarine/core/group.py b/kubemarine/core/group.py index d0395293f..e7a7c8c7f 100755 --- a/kubemarine/core/group.py +++ b/kubemarine/core/group.py @@ -346,6 +346,33 @@ def do(self: 'NodeGroup', *args, logging_stream_level: int = None, **kwargs): return do +def _handle_dry_run(fn: callable) -> callable: + """ + Method is a decorator that handles internal streaming of output (hide=False) of fabric (invoke). + Note! This decorator should be the outermost. + :param fn: Origin function to apply annotation to + :return: Validation wrapper function + """ + def do_dry_run(self: 'NodeGroup', *args, **kwargs): + results = {} + + if kwargs.get("dry_run"): + if fn.__name__ == "put": + self.cluster.log.verbose("Local file \"%s\" is being transferred to remote file \"%s\" on nodes %s with options %s" + % (args[0], args[1], list(self.nodes.keys()), kwargs)) + else: + self.cluster.log.verbose('Performing %s %s on nodes %s with options: %s' % (fn.__name__, args[0], list(self.nodes.keys()), kwargs)) + return NodeGroupResult(self.cluster, results) + try: + results = fn(self, *args, **kwargs) + return results + except fabric.group.GroupException as e: + results = e.result + raise + + return do_dry_run + + class NodeGroup: def __init__(self, connections: Connections, cluster): @@ -399,13 +426,16 @@ def _make_result_or_fail(self, results: _HostToResult, return group_result @_handle_internal_logging + @_handle_dry_run def run(self, *args, **kwargs) -> Union[NodeGroupResult, int]: return self.do("run", *args, **kwargs) @_handle_internal_logging + @_handle_dry_run def sudo(self, *args, **kwargs) -> Union[NodeGroupResult, int]: return self.do("sudo", *args, **kwargs) + @_handle_dry_run def put(self, local_file: Union[io.StringIO, str], remote_file: str, **kwargs): if isinstance(local_file, io.StringIO): self.cluster.log.verbose("Text is being transferred to remote file \"%s\" on nodes %s with options %s" @@ -441,6 +471,7 @@ def put(self, local_file: Union[io.StringIO, str], remote_file: str, **kwargs): with open(local_file, "rb") as local_stream: group_to_upload._put(local_stream, remote_file, **kwargs) + @_handle_dry_run def _put(self, local_stream: IO, remote_file: str, **kwargs): hide = kwargs.pop("hide", True) is True sudo = kwargs.pop("sudo", False) is True diff --git a/kubemarine/core/utils.py b/kubemarine/core/utils.py index 9793d6e24..140a72f63 100755 --- a/kubemarine/core/utils.py +++ b/kubemarine/core/utils.py @@ -464,6 +464,10 @@ def _test_version(version: str, numbers_amount: int) -> list: raise ValueError(f'Incorrect version \"{version}\" format, expected version pattern is \"{expected_pattern}\"') +def check_dry_run_status_active(cluster): + return cluster.context.get("dry_run") + + class ClusterStorage: """ File preservation: diff --git a/kubemarine/coredns.py b/kubemarine/coredns.py index 18db27fae..ed607a72a 100644 --- a/kubemarine/coredns.py +++ b/kubemarine/coredns.py @@ -143,18 +143,18 @@ def generate_configmap(inventory): return config + '\n' -def apply_configmap(cluster, config): +def apply_configmap(cluster, config, dry_run=False): utils.dump_file(cluster, config, 'coredns-configmap.yaml') group = cluster.nodes['control-plane'].include_group(cluster.nodes.get('worker')).get_final_nodes() - group.put(io.StringIO(config), '/etc/kubernetes/coredns-configmap.yaml', backup=True, sudo=True) + group.put(io.StringIO(config), '/etc/kubernetes/coredns-configmap.yaml', backup=True, sudo=True, dry_run=dry_run) return cluster.nodes['control-plane'].get_final_nodes().get_first_member()\ .sudo('kubectl apply -f /etc/kubernetes/coredns-configmap.yaml && ' - 'sudo kubectl rollout restart -n kube-system deployment/coredns') + 'sudo kubectl rollout restart -n kube-system deployment/coredns', dry_run=dry_run) -def apply_patch(cluster): +def apply_patch(cluster, dry_run=False): apply_command = '' for config_type in ['deployment']: @@ -172,11 +172,11 @@ def apply_patch(cluster): utils.dump_file(cluster, config, filename) group = cluster.nodes['control-plane'].include_group(cluster.nodes.get('worker')).get_final_nodes() - group.put(io.StringIO(config), filepath, backup=True, sudo=True) + group.put(io.StringIO(config), filepath, backup=True, sudo=True, dry_run=dry_run) apply_command = 'kubectl patch %s coredns -n kube-system --type merge -p \"$(sudo cat %s)\"' % (config_type, filepath) if apply_command == '': return 'Nothing to patch' - return cluster.nodes['control-plane'].get_final_nodes().get_first_member().sudo(apply_command) + return cluster.nodes['control-plane'].get_final_nodes().get_first_member().sudo(apply_command, dry_run=dry_run) diff --git a/kubemarine/cri/containerd.py b/kubemarine/cri/containerd.py index dd0583c71..c3087b194 100755 --- a/kubemarine/cri/containerd.py +++ b/kubemarine/cri/containerd.py @@ -26,6 +26,9 @@ def install(group: NodeGroup): + if utils.check_dry_run_status_active(group.cluster): + group.cluster.log.debug("[dry-run] Installing Containerd") + return None with RemoteExecutor(group.cluster) as exe: for node in group.get_ordered_members_list(provide_node_configs=True): os_specific_associations = group.cluster.get_associations_for_node(node['connect_to'], 'containerd') @@ -49,7 +52,8 @@ def configure(group: NodeGroup): log.debug("Uploading crictl configuration for containerd...") crictl_config = yaml.dump({"runtime-endpoint": "unix:///run/containerd/containerd.sock"}) utils.dump_file(group.cluster, crictl_config, 'crictl.yaml') - group.put(StringIO(crictl_config), '/etc/crictl.yaml', backup=True, sudo=True) + dry_run = utils.check_dry_run_status_active(group.cluster) + group.put(StringIO(crictl_config), '/etc/crictl.yaml', backup=True, sudo=True, dry_run=dry_run) config_string = "" # double loop is used to make sure that no "simple" `key: value` pairs are accidentally assigned to sections @@ -89,19 +93,22 @@ def configure(group: NodeGroup): if registry_configs[auth_registry].get('auth', {}).get('auth', ''): auth_registries['auths'][auth_registry]['auth'] = registry_configs[auth_registry]['auth']['auth'] auth_json = json.dumps(auth_registries) - group.put(StringIO(auth_json), "/etc/containers/auth.json", backup=True, sudo=True) - group.sudo("chmod 600 /etc/containers/auth.json") + group.put(StringIO(auth_json), "/etc/containers/auth.json", backup=True, sudo=True, dry_run=dry_run) + group.sudo("chmod 600 /etc/containers/auth.json", dry_run=dry_run) if insecure_registries: log.debug("Uploading podman configuration...") podman_registries = f"[registries.insecure]\nregistries = {insecure_registries}\n" utils.dump_file(group.cluster, podman_registries, 'podman_registries.conf') - group.sudo("mkdir -p /etc/containers/") - group.put(StringIO(podman_registries), "/etc/containers/registries.conf", backup=True, sudo=True) + group.sudo("mkdir -p /etc/containers/", dry_run=dry_run) + group.put(StringIO(podman_registries), "/etc/containers/registries.conf", backup=True, sudo=True, dry_run=dry_run) else: log.debug("Removing old podman configuration...") - group.sudo("rm -f /etc/containers/registries.conf") + group.sudo("rm -f /etc/containers/registries.conf", dry_run=dry_run) utils.dump_file(group.cluster, config_string, 'containerd-config.toml') + if dry_run: + return None + with RemoteExecutor(group.cluster) as exe: for node in group.get_ordered_members_list(provide_node_configs=True): os_specific_associations = group.cluster.get_associations_for_node(node['connect_to'], 'containerd') diff --git a/kubemarine/cri/docker.py b/kubemarine/cri/docker.py index 74aed30f8..d635f18ce 100755 --- a/kubemarine/cri/docker.py +++ b/kubemarine/cri/docker.py @@ -22,6 +22,9 @@ def install(group: NodeGroup): + if utils.check_dry_run_status_active(group.cluster): + group.cluster.log.debug("[dry-run] Installing Docker") + return None with RemoteExecutor(group.cluster) as exe: for node in group.get_ordered_members_list(provide_node_configs=True): os_specific_associations = group.cluster.get_associations_for_node(node['connect_to'], 'docker') @@ -56,6 +59,8 @@ def configure(group: NodeGroup): settings_json = json.dumps(group.cluster.inventory["services"]['cri']['dockerConfig'], sort_keys=True, indent=4) utils.dump_file(group.cluster, settings_json, 'docker-daemon.json') + if utils.check_dry_run_status_active(group.cluster): + group.cluster.log.debug("[dry-run] Configuring Docker") with RemoteExecutor(group.cluster) as exe: for node in group.get_ordered_members_list(provide_node_configs=True): diff --git a/kubemarine/keepalived.py b/kubemarine/keepalived.py index d24e3fa07..8c3bd9fe4 100644 --- a/kubemarine/keepalived.py +++ b/kubemarine/keepalived.py @@ -192,9 +192,11 @@ def uninstall(group): return packages.remove(group, include='keepalived') -def restart(group: NodeGroup): +def restart(group: NodeGroup, dry_run=False): cluster = group.cluster cluster.log.debug("Restarting keepalived in all group...") + if dry_run: + return with RemoteExecutor(cluster): for node in group.get_ordered_members_list(provide_node_configs=True): service_name = group.cluster.get_package_association_for_node( @@ -260,7 +262,7 @@ def generate_config(inventory, node): def configure(group: NodeGroup) -> NodeGroupResult: log = group.cluster.log group_members = group.get_ordered_members_list(provide_node_configs=True) - + dry_run = utils.check_dry_run_status_active(group.cluster) with RemoteExecutor(group.cluster): for node in group_members: @@ -269,15 +271,15 @@ def configure(group: NodeGroup) -> NodeGroupResult: package_associations = group.cluster.get_associations_for_node(node['connect_to'], 'keepalived') configs_directory = '/'.join(package_associations['config_location'].split('/')[:-1]) - group.sudo('mkdir -p %s' % configs_directory, hide=True) + group.sudo('mkdir -p %s' % configs_directory, hide=True, dry_run=dry_run) config = generate_config(group.cluster.inventory, node) utils.dump_file(group.cluster, config, 'keepalived_%s.conf' % node['name']) - node['connection'].put(io.StringIO(config), package_associations['config_location'], sudo=True) + node['connection'].put(io.StringIO(config), package_associations['config_location'], sudo=True, dry_run=dry_run) - log.debug(group.sudo('ls -la %s' % package_associations['config_location'])) + log.debug(group.sudo('ls -la %s' % package_associations['config_location'], dry_run=dry_run)) - restart(group) + restart(group, dry_run) - return group.sudo('systemctl status %s' % package_associations['service_name'], warn=True) + return group.sudo('systemctl status %s' % package_associations['service_name'], warn=True, dry_run=dry_run) diff --git a/kubemarine/kubernetes/__init__.py b/kubemarine/kubernetes/__init__.py index 93de5a42e..c0356434d 100644 --- a/kubemarine/kubernetes/__init__.py +++ b/kubemarine/kubernetes/__init__.py @@ -206,6 +206,9 @@ def reset_installation_env(group: NodeGroup): cluster = group.cluster + if utils.check_dry_run_status_active(cluster): + return + drain_timeout = cluster.procedure_inventory.get('drain_timeout') grace_period = cluster.procedure_inventory.get('grace_period') @@ -322,41 +325,49 @@ def is_available_control_plane(control_plane): def install(group): log = group.cluster.log + dry_run = utils.check_dry_run_status_active(group.cluster) with RemoteExecutor(group.cluster): log.debug("Making systemd unit...") - group.sudo('rm -rf /etc/systemd/system/kubelet*') + group.sudo('rm -rf /etc/systemd/system/kubelet*', dry_run=dry_run) for node in group.cluster.inventory["nodes"]: # perform only for current group members if node["connect_to"] in group.nodes.keys(): template = Template(utils.read_internal('templates/kubelet.service.j2')).render( hostname=node["name"]) log.debug("Uploading to '%s'..." % node["connect_to"]) - node["connection"].put(io.StringIO(template + "\n"), '/etc/systemd/system/kubelet.service', sudo=True) - node["connection"].sudo("chmod 644 /etc/systemd/system/kubelet.service") + node["connection"].put(io.StringIO(template + "\n"), '/etc/systemd/system/kubelet.service', sudo=True, dry_run=dry_run) + node["connection"].sudo("chmod 644 /etc/systemd/system/kubelet.service", dry_run=dry_run) log.debug("\nReloading systemd daemon...") - system.reload_systemctl(group) - group.sudo('systemctl enable kubelet') + system.reload_systemctl(group, dry_run) + group.sudo('systemctl enable kubelet', dry_run=dry_run) - return group.sudo('systemctl status kubelet', warn=True) + return group.sudo('systemctl status kubelet', warn=True, dry_run=dry_run) def join_other_control_planes(group): other_control_planes_group = group.get_ordered_members_list(provide_node_configs=True)[1:] - - join_dict = group.cluster.context["join_dict"] + dry_run = utils.check_dry_run_status_active(group.cluster) + join_dict = group.cluster.context.get("join_dict") for node in other_control_planes_group: + if dry_run: + log.debug('[dry-run]Joining control-plane \'%s\'...' % node['name']) + continue join_control_plane(group, node, join_dict) group.cluster.log.debug("Verifying installation...") first_control_plane = group.get_first_member(provide_node_configs=True) - return first_control_plane['connection'].sudo("kubectl get pods --all-namespaces -o=wide") + return first_control_plane['connection'].sudo("kubectl get pods --all-namespaces -o=wide", dry_run=dry_run) -def join_new_control_plane(group): - join_dict = get_join_dict(group) +def join_new_control_plane(group, dry_run=False): + dry_run = utils.check_dry_run_status_active(group.cluster) + join_dict = get_join_dict(group, dry_run) for node in group.get_ordered_members_list(provide_node_configs=True): + if dry_run: + log.debug('[dry-run]Joining control-plane \'%s\'...' % node['name']) + continue join_control_plane(group, node, join_dict) @@ -493,7 +504,12 @@ def fetch_admin_config(cluster: KubernetesCluster) -> str: return kubeconfig_filename -def get_join_dict(group): +def get_join_dict(group, dry_run=False): + join_dict = {} + if dry_run: + join_dict['discovery-token-ca-cert-hash'] = None + join_dict['token'] = None + return join_dict first_control_plane = group.cluster.nodes["control-plane"].get_first_member(provide_node_configs=True) token_result = first_control_plane['connection'].sudo("kubeadm token create --print-join-command", hide=False) join_strings = list(token_result.values())[0].stdout.rstrip("\n") @@ -512,7 +528,9 @@ def get_join_dict(group): def init_first_control_plane(group): - log = group.cluster.log + cluster = group.cluster + log = cluster.log + dry_run = utils.check_dry_run_status_active(cluster) first_control_plane = group.get_first_member(provide_node_configs=True) first_control_plane_group = first_control_plane["connection"] @@ -547,8 +565,8 @@ def init_first_control_plane(group): utils.dump_file(group.cluster, config, 'init-config_%s.yaml' % first_control_plane['name']) log.debug("Uploading init config to initial control_plane...") - first_control_plane_group.sudo("mkdir -p /etc/kubernetes") - first_control_plane_group.put(io.StringIO(config), '/etc/kubernetes/init-config.yaml', sudo=True) + first_control_plane_group.sudo("mkdir -p /etc/kubernetes", dry_run=dry_run) + first_control_plane_group.put(io.StringIO(config), '/etc/kubernetes/init-config.yaml', sudo=True, dry_run=dry_run) # put control-plane patches create_kubeadm_patches_for_node(group.cluster, first_control_plane) @@ -562,7 +580,9 @@ def init_first_control_plane(group): " --config=/etc/kubernetes/init-config.yaml" " --ignore-preflight-errors='" + group.cluster.inventory['services']['kubeadm_flags']['ignorePreflightErrors'] + "'" " --v=5", - hide=False) + hide=False, dry_run=dry_run) + if dry_run: + return copy_admin_config(log, first_control_plane_group) @@ -670,7 +690,8 @@ def wait_for_nodes(group: NodeGroup): def init_workers(group): - join_dict = group.cluster.context.get("join_dict", get_join_dict(group)) + dry_run = utils.check_dry_run_status_active(group.cluster) + join_dict = group.cluster.context.get("join_dict", get_join_dict(group, dry_run)) join_config = { 'apiVersion': group.cluster.inventory["services"]["kubeadm"]['apiVersion'], @@ -705,8 +726,8 @@ def init_workers(group): utils.dump_file(group.cluster, config, 'join-config-workers.yaml') - group.sudo("mkdir -p /etc/kubernetes") - group.put(io.StringIO(config), '/etc/kubernetes/join-config.yaml', sudo=True) + group.sudo("mkdir -p /etc/kubernetes", dry_run=dry_run) + group.put(io.StringIO(config), '/etc/kubernetes/join-config.yaml', sudo=True, dry_run=dry_run) # put control-plane patches for node in group.get_ordered_members_list(provide_node_configs=True): @@ -718,12 +739,15 @@ def init_workers(group): "kubeadm join --config=/etc/kubernetes/join-config.yaml" " --ignore-preflight-errors='" + group.cluster.inventory['services']['kubeadm_flags']['ignorePreflightErrors'] + "'" " --v=5", - is_async=False, hide=False) + is_async=False, hide=False, dry_run=dry_run) def apply_labels(group): log = group.cluster.log log.debug("Applying additional labels for nodes") + if utils.check_dry_run_status_active(group.cluster): + log.debug("[dry-run]Successfully applied additional labels") + return None # TODO: Add "--overwrite-labels" switch # TODO: Add labels validation after applying with RemoteExecutor(group.cluster): @@ -747,6 +771,9 @@ def apply_taints(group): log = group.cluster.log log.debug("Applying additional taints for nodes") + if utils.check_dry_run_status_active(group.cluster): + log.debug("[dry-run]Successfully applied additional taints") + return None with RemoteExecutor(group.cluster): for node in group.get_ordered_members_list(provide_node_configs=True): if "taints" not in node: @@ -1218,6 +1245,7 @@ def images_grouped_prepull(group: NodeGroup, group_size: int = None): """ cluster = group.cluster + dry_run = utils.check_dry_run_status_active(cluster) log = cluster.log if not group_size: @@ -1240,8 +1268,8 @@ def images_grouped_prepull(group: NodeGroup, group_size: int = None): for group_i in range(groups_amount): log.verbose('Prepulling images for group #%s...' % group_i) # RemoteExecutor used for future cases, when some nodes will require another/additional actions for prepull - for node_i in range(group_i*group_size, (group_i*group_size)+group_size): - if node_i < len(nodes): + for node_i in range(group_i*group_size, (group_i*group_size) + group_size): + if node_i < len(nodes) and not dry_run: images_prepull(nodes[node_i]) return exe.get_last_results_str() @@ -1269,6 +1297,9 @@ def images_prepull(group: NodeGroup): def schedule_running_nodes_report(cluster: KubernetesCluster): + if utils.check_dry_run_status_active(cluster): + cluster.log.verbose("[dry-run] Scheduling running nodes report") + return summary.schedule_delayed_report(cluster, exec_running_nodes_report) @@ -1350,9 +1381,13 @@ def get_patched_flags_for_control_plane_item(inventory, control_plane_item, node return flags + # function to create kubeadm patches and put them to a node -def create_kubeadm_patches_for_node(cluster, node): +def create_kubeadm_patches_for_node(cluster, node, dry_run=False): cluster.log.verbose(f"Create and upload kubeadm patches to %s..." % node['name']) + if dry_run: + return + node['connection'].sudo('sudo rm -rf /etc/kubernetes/patches ; sudo mkdir -p /etc/kubernetes/patches', warn=True) # TODO: when k8s v1.21 is excluded from Kubemarine, this condition should be removed diff --git a/kubemarine/kubernetes_accounts.py b/kubemarine/kubernetes_accounts.py index c5e8ecc72..cf9696d16 100644 --- a/kubemarine/kubernetes_accounts.py +++ b/kubemarine/kubernetes_accounts.py @@ -66,6 +66,7 @@ def enrich_inventory(inventory, cluster): def install(cluster: KubernetesCluster): rbac = cluster.inventory['rbac'] + dry_run = utils.check_dry_run_status_active(cluster) if not rbac.get("accounts"): cluster.log.debug("No accounts specified to install, skipping...") return @@ -86,10 +87,10 @@ def install(cluster: KubernetesCluster): cluster.log.debug("Uploading template...") cluster.log.debug("\tDestination: %s" % destination_path) - cluster.nodes['control-plane'].put(io.StringIO(dump), destination_path, sudo=True) + cluster.nodes['control-plane'].put(io.StringIO(dump), destination_path, sudo=True, dry_run=dry_run) cluster.log.debug("Applying yaml...") - cluster.nodes['control-plane'].get_first_member().sudo('kubectl apply -f %s' % destination_path, hide=False) + cluster.nodes['control-plane'].get_first_member().sudo('kubectl apply -f %s' % destination_path, hide=False, dry_run=dry_run) cluster.log.debug('Loading token...') load_tokens_cmd = 'kubectl -n %s get secret ' \ @@ -98,14 +99,17 @@ def install(cluster: KubernetesCluster): token = [] retries = cluster.globals['accounts']['retries'] - # Token creation in Kubernetes 1.24 is not syncronus, therefore retries are necessary - while retries > 0: - result = cluster.nodes['control-plane'].get_first_member().sudo(load_tokens_cmd) - token = list(result.values())[0].stdout - if not token: - retries -= 1 - else: - break + if dry_run: + token = ["dry-run"] + else: + # Token creation in Kubernetes 1.24 is not syncronus, therefore retries are necessary + while retries > 0: + result = cluster.nodes['control-plane'].get_first_member().sudo(load_tokens_cmd) + token = list(result.values())[0].stdout + if not token: + retries -= 1 + else: + break if not token: raise Exception(f"The token loading for {account['name']} 'ServiceAccount' failed") diff --git a/kubemarine/plugins/__init__.py b/kubemarine/plugins/__init__.py index 18be34b9b..bb9d0f8bf 100755 --- a/kubemarine/plugins/__init__.py +++ b/kubemarine/plugins/__init__.py @@ -193,11 +193,14 @@ def install(cluster, plugins=None): def install_plugin(cluster, plugin_name, installation_procedure): + dry_run = utils.check_dry_run_status_active(cluster) cluster.log.debug("**** INSTALLING PLUGIN %s ****" % plugin_name) - - for current_step_i, step in enumerate(installation_procedure): + for _, step in enumerate(installation_procedure): for apply_type, configs in step.items(): - procedure_types[apply_type]['apply'](cluster, configs, plugin_name) + if not dry_run: + procedure_types[apply_type]['apply'](cluster, configs, plugin_name) + elif procedure_types[apply_type].get('verify'): + procedure_types[apply_type]['verify'](cluster, configs) def expect_daemonset(cluster: KubernetesCluster, diff --git a/kubemarine/procedures/install.py b/kubemarine/procedures/install.py index 722ffbd80..ca7e3222f 100755 --- a/kubemarine/procedures/install.py +++ b/kubemarine/procedures/install.py @@ -145,6 +145,9 @@ def system_prepare_system_setup_selinux(group: NodeGroup): @_applicable_for_new_nodes_with_roles('all') def system_prepare_system_setup_apparmor(group: NodeGroup): + if utils.check_dry_run_status_active(group.cluster): + group.cluster.log.debug("[dry-run] Setting up apparmor") + return group.call(apparmor.setup_apparmor) @@ -179,26 +182,30 @@ def system_prepare_policy(group: NodeGroup): Task generates rules for logging kubernetes and on audit """ cluster = group.cluster + dry_run = utils.check_dry_run_status_active(cluster) api_server_extra_args = cluster.inventory['services']['kubeadm']['apiServer']['extraArgs'] audit_log_dir = os.path.dirname(api_server_extra_args['audit-log-path']) audit_file_name = api_server_extra_args['audit-policy-file'] audit_policy_dir = os.path.dirname(audit_file_name) - group.sudo(f"mkdir -p {audit_log_dir} && sudo mkdir -p {audit_policy_dir}") + group.sudo(f"mkdir -p {audit_log_dir} && sudo mkdir -p {audit_policy_dir}", dry_run=dry_run) policy_config = cluster.inventory['services']['audit'].get('cluster_policy') collect_node = group.get_ordered_members_list(provide_node_configs=True) if policy_config: policy_config_file = yaml.dump(policy_config) utils.dump_file(cluster, policy_config_file, 'audit-policy.yaml') - #download rules in cluster + # download rules in cluster for node in collect_node: - node['connection'].put(io.StringIO(policy_config_file), audit_file_name, sudo=True, backup=True) + node['connection'].put(io.StringIO(policy_config_file), audit_file_name, sudo=True, backup=True, dry_run=dry_run) audit_config = True cluster.log.debug("Audit cluster policy config") else: audit_config = False cluster.log.debug("Audit cluster policy config is empty, nothing will be configured ") + if dry_run: + return + if kubernetes.is_cluster_installed(cluster) and audit_config is True and cluster.context['initial_procedure'] != 'add_node': for control_plane in collect_node: config_new = (kubernetes.get_kubeadm_config(cluster.inventory)) @@ -250,10 +257,11 @@ def system_prepare_policy(group: NodeGroup): @_applicable_for_new_nodes_with_roles('all') def system_prepare_dns_hostname(group: NodeGroup): cluster = group.cluster + dry_run = utils.check_dry_run_status_active(cluster) with RemoteExecutor(cluster): for node in group.get_ordered_members_list(provide_node_configs=True): cluster.log.debug("Changing hostname '%s' = '%s'" % (node["connect_to"], node["name"])) - node["connection"].sudo("hostnamectl set-hostname %s" % node["name"]) + node["connection"].sudo("hostnamectl set-hostname %s" % node["name"], dry_run=dry_run) @_applicable_for_new_nodes_with_roles('all') @@ -262,9 +270,9 @@ def system_prepare_dns_resolv_conf(group: NodeGroup): if cluster.inventory["services"].get("resolv.conf") is None: cluster.log.debug("Skipped - resolv.conf section not defined in config file") return - - system.update_resolv_conf(group, config=cluster.inventory["services"].get("resolv.conf")) - cluster.log.debug(group.sudo("ls -la /etc/resolv.conf; sudo lsattr /etc/resolv.conf")) + dry_run = utils.check_dry_run_status_active(cluster) + system.update_resolv_conf(group, config=cluster.inventory["services"].get("resolv.conf"), dry_run=dry_run) + cluster.log.debug(group.sudo("ls -la /etc/resolv.conf; sudo lsattr /etc/resolv.conf", dry_run=dry_run)) def system_prepare_dns_etc_hosts(cluster): @@ -275,9 +283,10 @@ def system_prepare_dns_etc_hosts(cluster): cluster.log.debug("\nUploading...") group = cluster.nodes['all'].get_final_nodes() + dry_run = utils.check_dry_run_status_active(cluster) - system.update_etc_hosts(group, config=config) - cluster.log.debug(group.sudo("ls -la /etc/hosts")) + system.update_etc_hosts(group, config=config, dry_run=dry_run) + cluster.log.debug(group.sudo("ls -la /etc/hosts", dry_run=dry_run)) @_applicable_for_new_nodes_with_roles('all') @@ -312,7 +321,7 @@ def system_prepare_package_manager_manage_packages(group: NodeGroup): def manage_mandatory_packages(group: NodeGroup): cluster = group.cluster - + dry_run = utils.check_dry_run_status_active(cluster) with RemoteExecutor(cluster) as exe: for node in group.get_ordered_members_list(): pkgs = [] @@ -322,13 +331,17 @@ def manage_mandatory_packages(group: NodeGroup): if pkgs: cluster.log.debug(f"Installing {pkgs} on {node.get_node_name()!r}") - packages.install(node, pkgs) + if not dry_run: + packages.install(node, pkgs) return exe.get_merged_result() def manage_custom_packages(group: NodeGroup): cluster = group.cluster + if utils.check_dry_run_status_active(group.cluster): + cluster.log.verbose("[dry-run] performing manage_custom_packages") + return batch_tasks = [] batch_parameters = {} @@ -409,10 +422,16 @@ def system_prepare_thirdparties(group: NodeGroup): @_applicable_for_new_nodes_with_roles('balancer') def deploy_loadbalancer_haproxy_install(group: NodeGroup): + if utils.check_dry_run_status_active(group.cluster): + group.cluster.log.debug("[dry-run] Installing load-balancer HA-PROXY") + return group.call(haproxy.install) def deploy_loadbalancer_haproxy_configure(cluster): + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Configuring load-balancer HA-PROXY") + return if not cluster.inventory['services'].get('loadbalancer', {}) \ .get('haproxy', {}).get('keep_configs_updated', True): @@ -462,6 +481,9 @@ def deploy_loadbalancer_keepalived_install(cluster): cluster.log.debug('Skipped - no VRRP IPs to perform') return + if utils.check_dry_run_status_active(cluster): + cluster.log.debug('Installing load-balancer Keepalived') + return # add_node will impact all keepalived group.call(keepalived.install) @@ -525,13 +547,14 @@ def deploy_kubernetes_init(cluster: KubernetesCluster): def deploy_coredns(cluster): + dry_run = utils.check_dry_run_status_active(cluster) config = coredns.generate_configmap(cluster.inventory) cluster.log.debug('Applying patch...') - cluster.log.debug(coredns.apply_patch(cluster)) + cluster.log.debug(coredns.apply_patch(cluster, dry_run)) cluster.log.debug('Applying configmap...') - cluster.log.debug(coredns.apply_configmap(cluster, config)) + cluster.log.debug(coredns.apply_configmap(cluster, config, dry_run)) def deploy_plugins(cluster): @@ -545,6 +568,9 @@ def deploy_accounts(cluster): def overview(cluster): cluster.log.debug("Retrieving cluster status...") control_plane = cluster.nodes["control-plane"].get_final_nodes().get_first_member() + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Cluster is healthy...") + return cluster.log.debug("\nNAMESPACES:") control_plane.sudo("kubectl get namespaces", hide=False) cluster.log.debug("\nNODES:") diff --git a/kubemarine/selinux.py b/kubemarine/selinux.py index 686bfce95..68a1fa4ef 100644 --- a/kubemarine/selinux.py +++ b/kubemarine/selinux.py @@ -173,6 +173,7 @@ def is_config_valid(group, state=None, policy=None, permissive=None): def setup_selinux(group): log = group.cluster.log + dry_run = utils.check_dry_run_status_active(group.cluster) # this method handles cluster with multiple os, suppressing should be enabled if group.get_nodes_os() not in ['rhel', 'rhel8']: @@ -196,7 +197,7 @@ def setup_selinux(group): log.debug("Uploading selinux config...") utils.dump_file(group.cluster, config, 'selinux_config') - group.put(config, '/etc/selinux/config', backup=True, sudo=True) + group.put(config, '/etc/selinux/config', backup=True, sudo=True, dry_run=dry_run) semanage_commands = '' for item in expected_permissive: @@ -205,7 +206,7 @@ def setup_selinux(group): semanage_commands = semanage_commands + 'semanage permissive -a %s' % item log.verbose("The following command will be executed to configure permissive:\n%s" % semanage_commands) - group.sudo(semanage_commands) + group.sudo(semanage_commands, dry_run=dry_run) group.cluster.schedule_cumulative_point(system.reboot_nodes) group.cluster.schedule_cumulative_point(system.verify_system) diff --git a/kubemarine/sysctl.py b/kubemarine/sysctl.py index ec3fe3e56..df2a11c8f 100644 --- a/kubemarine/sysctl.py +++ b/kubemarine/sysctl.py @@ -67,17 +67,19 @@ def configure(group: NodeGroup) -> NodeGroupResult: The configuration will be placed in sysctl daemon directory. """ config = make_config(group.cluster) - group.sudo('rm -f /etc/sysctl.d/98-*-sysctl.conf') + dry_run = utils.check_dry_run_status_active(group.cluster) + group.sudo('rm -f /etc/sysctl.d/98-*-sysctl.conf', dry_run=dry_run) utils.dump_file(group.cluster, config, '98-kubemarine-sysctl.conf') - group.put(io.StringIO(config), '/etc/sysctl.d/98-kubemarine-sysctl.conf', backup=True, sudo=True) - return group.sudo('ls -la /etc/sysctl.d/98-kubemarine-sysctl.conf') + group.put(io.StringIO(config), '/etc/sysctl.d/98-kubemarine-sysctl.conf', backup=True, sudo=True, dry_run=dry_run) + return group.sudo('ls -la /etc/sysctl.d/98-kubemarine-sysctl.conf', dry_run=dry_run) def reload(group: NodeGroup) -> NodeGroupResult: """ Reloads sysctl configuration in the specified group. """ - return group.sudo('sysctl -p /etc/sysctl.d/98-*-sysctl.conf') + return group.sudo('sysctl -p /etc/sysctl.d/98-*-sysctl.conf', + dry_run=utils.check_dry_run_status_active(group.cluster)) def get_pid_max(inventory): diff --git a/kubemarine/system.py b/kubemarine/system.py index 99817eb73..b483cf452 100644 --- a/kubemarine/system.py +++ b/kubemarine/system.py @@ -170,14 +170,15 @@ def detect_of_family_by_name_version(name: str, version: str) -> str: return os_family -def update_resolv_conf(group, config=None): +def update_resolv_conf(group, config=None, dry_run=False): if config is None: raise Exception("Data can't be empty") # TODO: Use Jinja template buffer = get_resolv_conf_buffer(config) utils.dump_file(group.cluster, buffer, 'resolv.conf') - group.put(buffer, "/etc/resolv.conf", backup=True, immutable=True, sudo=True, hide=True) + group.put(buffer, "/etc/resolv.conf", backup=True, immutable=True, + sudo=True, hide=True, dry_run=dry_run) def get_resolv_conf_buffer(config): @@ -214,11 +215,11 @@ def generate_etc_hosts_config(inventory, cluster=None, etc_hosts_part='etc_hosts return result -def update_etc_hosts(group, config=None): +def update_etc_hosts(group, config=None, dry_run=False): if config is None: raise Exception("Data can't be empty") utils.dump_file(group.cluster, config, 'etc_hosts') - group.put(io.StringIO(config), "/etc/hosts", backup=True, sudo=True, hide=True) + group.put(io.StringIO(config), "/etc/hosts", backup=True, sudo=True, hide=True, dry_run=dry_run) def stop_service(group: NodeGroup, name: str) -> NodeGroupResult: @@ -252,7 +253,7 @@ def disable_service(group, name=None, now=True): cmd = 'systemctl disable %s' % name if now: cmd = cmd + " --now" - return group.sudo(cmd) + return group.sudo(cmd, dry_run=utils.check_dry_run_status_active(group.cluster)) def patch_systemd_service(group: NodeGroup, service_name: str, patch_source: str): @@ -319,15 +320,19 @@ def disable_swap(group): return result log.verbose("Switching swap off...") - - group.sudo('swapoff -a', warn=True) - group.sudo('sed -i.bak \'/swap/d\' /etc/fstab', warn=True) + dry_run = utils.check_dry_run_status_active(group.cluster) + group.sudo('swapoff -a', warn=True, dry_run=dry_run) + group.sudo('sed -i.bak \'/swap/d\' /etc/fstab', warn=True, dry_run=dry_run) group.cluster.schedule_cumulative_point(reboot_nodes) group.cluster.schedule_cumulative_point(verify_system) def reboot_nodes(cluster: KubernetesCluster): + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Performing reboot") + return + cluster.nodes["all"].get_new_nodes_or_self().call(reboot_group) @@ -380,14 +385,15 @@ def perform_group_reboot(group: NodeGroup): return result -def reload_systemctl(group): - return group.sudo('systemctl daemon-reload') +def reload_systemctl(group, dry_run=False): + return group.sudo('systemctl daemon-reload', dry_run=dry_run) def add_to_path(group, string): # TODO: Also update PATH in ~/.bash_profile group.sudo("export PATH=$PATH:%s" % string) + def configure_chronyd(group, retries=60): cluster = group.cluster log = cluster.log @@ -405,6 +411,11 @@ def configure_chronyd(group, retries=60): utils.dump_file(cluster, chronyd_config, 'chrony.conf') group.put(io.StringIO(chronyd_config), '/etc/chrony.conf', backup=True, sudo=True) group.sudo('systemctl restart chronyd') + + if utils.check_dry_run_status_active(group.cluster): + log.debug("[dry-run] Successfully configured chronyd") + return + while retries > 0: log.debug("Waiting for time sync, retries left: %s" % retries) results = group.sudo('chronyc tracking && sudo chronyc sources') @@ -450,6 +461,9 @@ def configure_timesyncd(group, retries=120): '&& sudo systemctl restart systemd-timesyncd.service ' '&& sudo systemctl status systemd-timesyncd.service') log.verbose(res) + if utils.check_dry_run_status_active(group.cluster): + log.debug("[dry-run] Successfully configured timesyncd") + return while retries > 0: log.debug("Waiting for time sync, retries left: %s" % retries) results = group.sudo('timedatectl timesync-status && sudo timedatectl status') @@ -498,8 +512,10 @@ def setup_modprobe(group): log.debug("Uploading config...") utils.dump_file(group.cluster, config, 'modprobe_predefined.conf') - group.put(io.StringIO(config), "/etc/modules-load.d/predefined.conf", backup=True, sudo=True, hide=True) - group.sudo("modprobe -a %s" % raw_config) + dry_run = utils.check_dry_run_status_active(group.cluster) + group.put(io.StringIO(config), "/etc/modules-load.d/predefined.conf", + backup=True, sudo=True, hide=True, dry_run=dry_run) + group.sudo("modprobe -a %s" % raw_config, dry_run=dry_run) group.cluster.schedule_cumulative_point(reboot_nodes) group.cluster.schedule_cumulative_point(verify_system) @@ -522,6 +538,9 @@ def is_modprobe_valid(group): def verify_system(cluster: KubernetesCluster): + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] verifying system") + return group = cluster.nodes["all"].get_new_nodes_or_self() log = cluster.log # this method handles clusters with multiple OS diff --git a/kubemarine/thirdparties.py b/kubemarine/thirdparties.py index e7b393de3..5f47fbd83 100644 --- a/kubemarine/thirdparties.py +++ b/kubemarine/thirdparties.py @@ -262,7 +262,7 @@ def enrich_inventory_apply_defaults(inventory: dict, cluster: KubernetesCluster) crictl_key = '/usr/bin/crictl.tar.gz' if cri_name == "docker" and \ crictl_key not in cluster.raw_inventory.get('services', {}).get('thirdparties', {}): - del(thirdparties[crictl_key]) + del thirdparties[crictl_key] return inventory @@ -289,6 +289,7 @@ def get_group_require_unzip(cluster: KubernetesCluster, inventory: dict) -> Node def install_thirdparty(filter_group: NodeGroup, destination: str) -> NodeGroupResult or None: cluster = filter_group.cluster + dry_run = utils.check_dry_run_status_active(cluster) config = cluster.inventory['services'].get('thirdparties', {}).get(destination) if config is None: @@ -329,11 +330,11 @@ def install_thirdparty(filter_group: NodeGroup, destination: str) -> NodeGroupRe remote_commands += ' && sudo rm -f %s && sudo curl --max-time %d -f -g -L %s -o %s && ' % (destination, cluster.inventory['globals']['timeout_download'], config['source'], destination) else: cluster.log.verbose('Installation via sftp upload detected') - cluster.log.debug(common_group.sudo(remote_commands)) + cluster.log.debug(common_group.sudo(remote_commands, dry_run=dry_run)) remote_commands = '' # TODO: Possible use SHA1 from inventory instead of calculating if provided? script = utils.read_internal(config['source']) - common_group.put(io.StringIO(script), destination, sudo=True) + common_group.put(io.StringIO(script), destination, sudo=True, dry_run=dry_run) # TODO: Do not upload local files if they already exists on remote machines @@ -350,25 +351,24 @@ def install_thirdparty(filter_group: NodeGroup, destination: str) -> NodeGroupRe if extension == 'zip': cluster.log.verbose('Unzip will be used for unpacking') remote_commands += ' && sudo unzip -o %s -d %s' % (destination, config['unpack']) - + remote_commands += ' && sudo unzip -qq -l %s | awk \'NF > 3 { print $4 }\'| xargs -I FILE sudo chmod %s %s/FILE' \ % (destination, config['mode'], config['unpack']) remote_commands += ' && sudo unzip -qq -l %s | awk \'NF > 3 { print $4 }\'| xargs -I FILE sudo chown -R %s %s/FILE' \ % (destination, config['owner'], config['unpack']) remote_commands += ' && sudo unzip -qq -l %s | awk \'NF > 3 { print $4 }\'| xargs -I FILE sudo ls -la %s/FILE' % (destination, config['unpack']) - + else: cluster.log.verbose('Tar will be used for unpacking') remote_commands += ' && sudo tar -zxf %s -C %s' % (destination, config['unpack']) - + remote_commands += ' && sudo tar -tf %s | xargs -I FILE sudo chmod %s %s/FILE' \ % (destination, config['mode'], config['unpack']) remote_commands += ' && sudo tar -tf %s | xargs -I FILE sudo chown %s %s/FILE' \ % (destination, config['owner'], config['unpack']) remote_commands += ' && sudo tar -tf %s | xargs -I FILE sudo ls -la %s/FILE' % (destination, config['unpack']) - - return common_group.sudo(remote_commands) + return common_group.sudo(remote_commands, dry_run=dry_run) def install_all_thirparties(group): From 252b9fcfa352ff9e82922a758da493a4d33d1e5f Mon Sep 17 00:00:00 2001 From: Vasudeo Nimbekar Date: Mon, 26 Jun 2023 11:06:45 +0530 Subject: [PATCH 2/8] Implemented dry-run for upgrade procedure --- kubemarine/apt.py | 2 +- kubemarine/core/defaults.py | 2 +- kubemarine/core/group.py | 2 + kubemarine/kubernetes/__init__.py | 72 ++++++++++++++++++------------- kubemarine/plugins/__init__.py | 26 +++++++---- kubemarine/procedures/install.py | 7 +-- kubemarine/procedures/upgrade.py | 14 +++--- 7 files changed, 76 insertions(+), 49 deletions(-) diff --git a/kubemarine/apt.py b/kubemarine/apt.py index e5733c321..c9a17d2dd 100644 --- a/kubemarine/apt.py +++ b/kubemarine/apt.py @@ -82,7 +82,7 @@ def install(group, include=None, exclude=None, **kwargs) -> NodeGroupResult: command = get_install_cmd(include, exclude) - return group.sudo(command, **kwargs) + return group.sudo(command, dry_run=utils.check_dry_run_status_active(group.cluster), **kwargs) # apt fails to install (downgrade) package if it is already present and has higher version, # thus we do not need additional checks here (in contrast to yum) diff --git a/kubemarine/core/defaults.py b/kubemarine/core/defaults.py index c8677d6b2..5eeabf99c 100755 --- a/kubemarine/core/defaults.py +++ b/kubemarine/core/defaults.py @@ -407,7 +407,7 @@ def enrich_inventory(cluster: KubernetesCluster, inventory: dict, make_dumps=Tru if make_dumps: from kubemarine import controlplane inventory_for_dump = controlplane.controlplane_finalize_inventory(cluster, prepare_for_dump(inventory)) - utils.dump_file(cluster, yaml.dump(inventory_for_dump, ), "cluster.yaml") + # utils.dump_file(cluster, yaml.dump(inventory_for_dump, ), "cluster.yaml") return inventory diff --git a/kubemarine/core/group.py b/kubemarine/core/group.py index e7a7c8c7f..f5bcacaaa 100755 --- a/kubemarine/core/group.py +++ b/kubemarine/core/group.py @@ -363,6 +363,8 @@ def do_dry_run(self: 'NodeGroup', *args, **kwargs): else: self.cluster.log.verbose('Performing %s %s on nodes %s with options: %s' % (fn.__name__, args[0], list(self.nodes.keys()), kwargs)) return NodeGroupResult(self.cluster, results) + elif "dry_run" in kwargs.keys(): + del kwargs["dry_run"] try: results = fn(self, *args, **kwargs) return results diff --git a/kubemarine/kubernetes/__init__.py b/kubemarine/kubernetes/__init__.py index c0356434d..fb6d29e6e 100644 --- a/kubemarine/kubernetes/__init__.py +++ b/kubemarine/kubernetes/__init__.py @@ -815,7 +815,7 @@ def get_kubeadm_config(inventory): return f'{kubeadm_kubelet}---\n{kubeadm}' -def upgrade_first_control_plane(upgrade_group: NodeGroup, cluster: KubernetesCluster, **drain_kwargs): +def upgrade_first_control_plane(upgrade_group: NodeGroup, cluster: KubernetesCluster, dry_run=False, **drain_kwargs): version = cluster.inventory["services"]["kubeadm"]["kubernetesVersion"] first_control_plane = cluster.nodes['control-plane'].get_first_member(provide_node_configs=True) @@ -826,8 +826,8 @@ def upgrade_first_control_plane(upgrade_group: NodeGroup, cluster: KubernetesClu cluster.log.debug("Upgrading first control-plane \"%s\"" % first_control_plane) # put control-plane patches - create_kubeadm_patches_for_node(cluster, first_control_plane) - + create_kubeadm_patches_for_node(cluster, first_control_plane, dry_run=dry_run) + # TODO: when k8s v1.21 is excluded from Kubemarine, this condition should be removed # and only "else" branch remains if "v1.21" in cluster.inventory["services"]["kubeadm"]["kubernetesVersion"]: @@ -839,16 +839,20 @@ def upgrade_first_control_plane(upgrade_group: NodeGroup, cluster: KubernetesClu flags += " --config /tmp/kubeadm_config.yaml" drain_cmd = prepare_drain_command(cluster, first_control_plane['name'], **drain_kwargs) - first_control_plane['connection'].sudo(drain_cmd, is_async=False, hide=False) + first_control_plane['connection'].sudo(drain_cmd, is_async=False, hide=False, dry_run=dry_run) - upgrade_cri_if_required(first_control_plane['connection']) + upgrade_cri_if_required(first_control_plane['connection'], dry_run=dry_run) # The procedure for removing the deprecated kubelet flag for versions older than 1.27.0 - fix_flag_kubelet(cluster, first_control_plane) + fix_flag_kubelet(cluster, first_control_plane, dry_run=dry_run) first_control_plane['connection'].sudo(f"sudo kubeadm upgrade apply {version} {flags} && " f"sudo kubectl uncordon {first_control_plane['name']} && " - f"sudo systemctl restart kubelet", is_async=False, hide=False) + f"sudo systemctl restart kubelet", is_async=False, hide=False, dry_run=dry_run) + if dry_run: + cluster.log.debug("[dry-run]Nodes have correct Kubernetes version...") + cluster.log.debug("[dry-run] Pods are ready!") + return copy_admin_config(cluster.log, first_control_plane['connection']) @@ -857,7 +861,7 @@ def upgrade_first_control_plane(upgrade_group: NodeGroup, cluster: KubernetesClu exclude_node_from_upgrade_list(first_control_plane['connection'], first_control_plane['name']) -def upgrade_other_control_planes(upgrade_group: NodeGroup, cluster: KubernetesCluster, **drain_kwargs): +def upgrade_other_control_planes(upgrade_group: NodeGroup, cluster: KubernetesCluster, dry_run=False, **drain_kwargs): version = cluster.inventory["services"]["kubeadm"]["kubernetesVersion"] first_control_plane = cluster.nodes['control-plane'].get_first_member(provide_node_configs=True) @@ -871,15 +875,15 @@ def upgrade_other_control_planes(upgrade_group: NodeGroup, cluster: KubernetesCl cluster.log.debug("Upgrading control-plane \"%s\"" % node['name']) # put control-plane patches - create_kubeadm_patches_for_node(cluster, node) + create_kubeadm_patches_for_node(cluster, node, dry_run=dry_run) drain_cmd = prepare_drain_command(cluster, node['name'], **drain_kwargs) - node['connection'].sudo(drain_cmd, is_async=False, hide=False) + node['connection'].sudo(drain_cmd, is_async=False, hide=False, dry_run=dry_run) - upgrade_cri_if_required(node['connection']) + upgrade_cri_if_required(node['connection'], dry_run=dry_run) # The procedure for removing the deprecated kubelet flag for versions older than 1.27.0 - fix_flag_kubelet(cluster, node) + fix_flag_kubelet(cluster, node, dry_run=dry_run) # TODO: when k8s v1.21 is excluded from Kubemarine, this condition should be removed # and only "else" branch remains @@ -888,19 +892,23 @@ def upgrade_other_control_planes(upgrade_group: NodeGroup, cluster: KubernetesCl f"sudo sed -i 's/--bind-address=.*$/--bind-address={node['internal_address']}/' " f"/etc/kubernetes/manifests/kube-apiserver.yaml && " f"sudo kubectl uncordon {node['name']} && " - f"sudo systemctl restart kubelet", is_async=False, hide=False) + f"sudo systemctl restart kubelet", is_async=False, hide=False, dry_run=dry_run) else: node['connection'].sudo(f"sudo kubeadm upgrade node --certificate-renewal=true --patches=/etc/kubernetes/patches && " f"sudo kubectl uncordon {node['name']} && " - f"sudo systemctl restart kubelet", is_async=False, hide=False) + f"sudo systemctl restart kubelet", is_async=False, hide=False, dry_run=dry_run) + if dry_run: + cluster.log.debug("[dry-run]Nodes have correct Kubernetes version...") + cluster.log.debug("[dry-run] Pods are ready!") + continue expect_kubernetes_version(cluster, version, apply_filter=node['name']) copy_admin_config(cluster.log, node['connection']) wait_for_any_pods(cluster, node['connection'], apply_filter=node['name']) exclude_node_from_upgrade_list(first_control_plane, node['name']) -def patch_kubeadm_configmap(first_control_plane, cluster): +def patch_kubeadm_configmap(first_control_plane, cluster, dry_run=False): ''' Checks and patches the Kubeadm configuration for compliance with the current imageRepository, audit log path and the corresponding version of the CoreDNS path to the image. @@ -937,12 +945,12 @@ def patch_kubeadm_configmap(first_control_plane, cluster): kubelet_config = first_control_plane["connection"].sudo("cat /var/lib/kubelet/config.yaml").get_simple_out() ryaml.dump(cluster_config, updated_config) result_config = kubelet_config + "---\n" + updated_config.getvalue() - first_control_plane["connection"].put(io.StringIO(result_config), "/tmp/kubeadm_config.yaml", sudo=True) + first_control_plane["connection"].put(io.StringIO(result_config), "/tmp/kubeadm_config.yaml", sudo=True, dry_run=dry_run) return True -def upgrade_workers(upgrade_group: NodeGroup, cluster: KubernetesCluster, **drain_kwargs): +def upgrade_workers(upgrade_group: NodeGroup, cluster: KubernetesCluster, dry_run=False, **drain_kwargs): version = cluster.inventory["services"]["kubeadm"]["kubernetesVersion"] first_control_plane = cluster.nodes['control-plane'].get_first_member(provide_node_configs=True) @@ -956,27 +964,30 @@ def upgrade_workers(upgrade_group: NodeGroup, cluster: KubernetesCluster, **drai cluster.log.debug("Upgrading worker \"%s\"" % node['name']) # put control-plane patches - create_kubeadm_patches_for_node(cluster, node) + create_kubeadm_patches_for_node(cluster, node, dry_run=dry_run) drain_cmd = prepare_drain_command(cluster, node['name'], **drain_kwargs) - first_control_plane['connection'].sudo(drain_cmd, is_async=False, hide=False) + first_control_plane['connection'].sudo(drain_cmd, is_async=False, hide=False, dry_run=dry_run) - upgrade_cri_if_required(node['connection']) + upgrade_cri_if_required(node['connection'], dry_run=dry_run) # The procedure for removing the deprecated kubelet flag for versions older than 1.27.0 - fix_flag_kubelet(cluster, node) + fix_flag_kubelet(cluster, node, dry_run=dry_run) # TODO: when k8s v1.21 is excluded from Kubemarine, this condition should be removed # and only "else" branch remains if "v1.21" in cluster.inventory["services"]["kubeadm"]["kubernetesVersion"]: node['connection'].sudo("kubeadm upgrade node --certificate-renewal=true && " - "sudo systemctl restart kubelet") + "sudo systemctl restart kubelet", dry_run=dry_run) else: node['connection'].sudo("kubeadm upgrade node --certificate-renewal=true --patches=/etc/kubernetes/patches && " - "sudo systemctl restart kubelet") - - first_control_plane['connection'].sudo("kubectl uncordon %s" % node['name'], is_async=False, hide=False) + "sudo systemctl restart kubelet", dry_run=dry_run) + first_control_plane['connection'].sudo("kubectl uncordon %s" % node['name'], is_async=False, hide=False, dry_run=dry_run) + if dry_run: + cluster.log.debug("[dry-run]Nodes have correct Kubernetes version...") + cluster.log.debug("[dry-run] Pods are ready!") + continue expect_kubernetes_version(cluster, version, apply_filter=node['name']) # workers do not have system pods to wait for their start exclude_node_from_upgrade_list(first_control_plane, node['name']) @@ -1000,7 +1011,7 @@ def prepare_drain_command(cluster: KubernetesCluster, node_name: str, return drain_cmd -def upgrade_cri_if_required(group: NodeGroup): +def upgrade_cri_if_required(group: NodeGroup, dry_run=False): # currently it is invoked only for single node cluster = group.cluster log = cluster.log @@ -1013,9 +1024,9 @@ def upgrade_cri_if_required(group: NodeGroup): packages.install(group, include=cri_packages) log.debug(f"Restarting all containers on node: {group.get_node_name()}") if cri_impl == "docker": - group.sudo("docker container rm -f $(sudo docker container ls -q)", warn=True) + group.sudo("docker container rm -f $(sudo docker container ls -q)", warn=True, dry_run=dry_run) else: - group.sudo("crictl rm -fa", warn=True) + group.sudo("crictl rm -fa", warn=True, dry_run=dry_run) else: log.debug(f"{cri_impl} upgrade is not required") @@ -1032,6 +1043,7 @@ def verify_upgrade_versions(cluster): " -o custom-columns='VERSION:.status.nodeInfo.kubeletVersion' " "| grep -vw ^VERSION ") curr_version = list(result.values())[0].stdout + print("@@@@@@@@@@@@@@@@@@@@@@@@@@", curr_version) test_version_upgrade_possible(curr_version, upgrade_version, skip_equal=True) @@ -1420,7 +1432,7 @@ def create_kubeadm_patches_for_node(cluster, node, dry_run=False): return -def fix_flag_kubelet(cluster: KubernetesCluster, node: dict): +def fix_flag_kubelet(cluster: KubernetesCluster, node: dict, dry_run=False): #Deprecated flag removal function for kubelet kubeadm_file = "/var/lib/kubelet/kubeadm-flags.env" version = cluster.inventory["services"]["kubeadm"]["kubernetesVersion"] @@ -1432,4 +1444,4 @@ def fix_flag_kubelet(cluster: KubernetesCluster, node: dict): kubeadm_flags = node['connection'].sudo(f"cat {kubeadm_file}", is_async=False).get_simple_out() if kubeadm_flags.find('--container-runtime=remote') != -1: kubeadm_flags = kubeadm_flags.replace('--container-runtime=remote', '') - node['connection'].put(io.StringIO(kubeadm_flags), kubeadm_file, backup=True, sudo=True) + node['connection'].put(io.StringIO(kubeadm_flags), kubeadm_file, backup=True, sudo=True, dry_run=dry_run) diff --git a/kubemarine/plugins/__init__.py b/kubemarine/plugins/__init__.py index e60a006d8..1f628010b 100755 --- a/kubemarine/plugins/__init__.py +++ b/kubemarine/plugins/__init__.py @@ -194,14 +194,10 @@ def install(cluster, plugins=None): def install_plugin(cluster, plugin_name, installation_procedure): - dry_run = utils.check_dry_run_status_active(cluster) cluster.log.debug("**** INSTALLING PLUGIN %s ****" % plugin_name) for _, step in enumerate(installation_procedure): for apply_type, configs in step.items(): - if not dry_run: - procedure_types[apply_type]['apply'](cluster, configs, plugin_name) - elif procedure_types[apply_type].get('verify'): - procedure_types[apply_type]['verify'](cluster, configs) + procedure_types[apply_type]['apply'](cluster, configs, plugin_name) def expect_daemonset(cluster: KubernetesCluster, @@ -549,6 +545,9 @@ def apply_expect(cluster, config, plugin_name=None): # TODO: Add support for expect services and expect nodes for expect_type, expect_conf in config.items(): + if utils.check_dry_run_status_active(cluster): + cluster.log.debug(f"[dry-run] {expect_type} are up to date...") + return None if expect_type == 'daemonsets': expect_daemonset(cluster, config['daemonsets']['list'], timeout=config['daemonsets'].get('timeout'), @@ -717,8 +716,11 @@ def apply_shell(cluster, step, plugin_name=None): if sudo: method = common_group.sudo + dry_run = utils.check_dry_run_status_active(cluster) cluster.log.debug('Running shell command...') - result = method(commands, env=in_vars_dict) + result = method(commands, env=in_vars_dict, dry_run=dry_run) + if dry_run: + return None if out_vars: stdout = list(result.values())[0].stdout @@ -781,6 +783,9 @@ def apply_ansible(cluster, step, plugin_name=None): cluster.log.verbose("Running shell \"%s\"" % command) result = subprocess.run(command, stdout=sys.stdout, stderr=sys.stderr, shell=True) + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Successfully applied ansible plugin...") + return None if result.returncode != 0: raise Exception("Failed to apply ansible plugin, see error above") @@ -824,6 +829,9 @@ def apply_helm(cluster: KubernetesCluster, config, plugin_name=None): command = prepare_for_helm_command + f'{deployment_mode} {release} {chart_path} --debug' output = subprocess.check_output(command, shell=True) + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Successfully applied helm plugin...") + return None cluster.log.debug(output.decode('utf-8')) return output @@ -1010,7 +1018,7 @@ def apply_source(cluster: KubernetesCluster, config: dict) -> None: source = config['source'] destination_path = config['destination'] apply_command = config.get('apply_command', 'kubectl apply -f %s' % destination_path) - + dry_run = utils.check_dry_run_status_active(cluster) if not destination_groups and not destination_nodes: destination_common_group = cluster.nodes['control-plane'] else: @@ -1021,14 +1029,14 @@ def apply_source(cluster: KubernetesCluster, config: dict) -> None: else: apply_common_group = cluster.create_group_from_groups_nodes_names(apply_groups, apply_nodes) - destination_common_group.put(source, destination_path, backup=True, sudo=use_sudo) + destination_common_group.put(source, destination_path, backup=True, sudo=use_sudo, dry_run=dry_run) if apply_required: method = apply_common_group.run if use_sudo: method = apply_common_group.sudo cluster.log.debug("Applying yaml...") - method(apply_command, logging_stream_level=logging.DEBUG) + method(apply_command, logging_stream_level=logging.DEBUG, dry_run=dry_run) else: cluster.log.debug('Apply is not required') diff --git a/kubemarine/procedures/install.py b/kubemarine/procedures/install.py index ca7e3222f..719ec8ee7 100755 --- a/kubemarine/procedures/install.py +++ b/kubemarine/procedures/install.py @@ -339,9 +339,6 @@ def manage_mandatory_packages(group: NodeGroup): def manage_custom_packages(group: NodeGroup): cluster = group.cluster - if utils.check_dry_run_status_active(group.cluster): - cluster.log.verbose("[dry-run] performing manage_custom_packages") - return batch_tasks = [] batch_parameters = {} @@ -371,6 +368,10 @@ def manage_custom_packages(group: NodeGroup): cluster.log.debug("Skipped - no packages configuration defined in config file") return + if utils.check_dry_run_status_active(group.cluster): + cluster.log.verbose("[dry-run] Managing Custom Packages...") + return + try: batch_results = group.call_batch(batch_tasks, **batch_parameters) except fabric.group.GroupException: diff --git a/kubemarine/procedures/upgrade.py b/kubemarine/procedures/upgrade.py index 1226ab379..e601554c8 100755 --- a/kubemarine/procedures/upgrade.py +++ b/kubemarine/procedures/upgrade.py @@ -53,29 +53,31 @@ def kubernetes_upgrade(cluster): drain_timeout = cluster.procedure_inventory.get('drain_timeout') grace_period = cluster.procedure_inventory.get('grace_period') disable_eviction = cluster.procedure_inventory.get("disable-eviction", True) + dry_run = utils.check_dry_run_status_active(cluster) drain_kwargs = { 'disable_eviction': disable_eviction, 'drain_timeout': drain_timeout, 'grace_period': grace_period } - kubernetes.upgrade_first_control_plane(upgrade_group, cluster, **drain_kwargs) + kubernetes.upgrade_first_control_plane(upgrade_group, cluster, dry_run=dry_run, **drain_kwargs) # After first control-plane upgrade is finished we may loose our CoreDNS changes. # Thus, we need to re-apply our CoreDNS changes immediately after first control-plane upgrade. install.deploy_coredns(cluster) - kubernetes.upgrade_other_control_planes(upgrade_group, cluster, **drain_kwargs) + kubernetes.upgrade_other_control_planes(upgrade_group, cluster, dry_run=dry_run, **drain_kwargs) if cluster.nodes.get('worker', []): - kubernetes.upgrade_workers(upgrade_group, cluster, **drain_kwargs) + kubernetes.upgrade_workers(upgrade_group, cluster, dry_run=dry_run, **drain_kwargs) - cluster.nodes['control-plane'].get_first_member().sudo('rm -f /etc/kubernetes/nodes-k8s-versions.txt') + cluster.nodes['control-plane'].get_first_member().sudo('rm -f /etc/kubernetes/nodes-k8s-versions.txt', dry_run=dry_run) cluster.context['cached_nodes_versions_cleaned'] = True def kubernetes_cleanup_nodes_versions(cluster): + dry_run = utils.check_dry_run_status_active(cluster) if not cluster.context.get('cached_nodes_versions_cleaned', False): cluster.log.verbose('Cached nodes versions required') - cluster.nodes['control-plane'].get_first_member().sudo('rm -f /etc/kubernetes/nodes-k8s-versions.txt') + cluster.nodes['control-plane'].get_first_member().sudo('rm -f /etc/kubernetes/nodes-k8s-versions.txt', dry_run=dry_run) else: cluster.log.verbose('Cached nodes versions already cleaned') kubernetes_apply_taints(cluster) @@ -137,6 +139,8 @@ def upgrade_containerd(cluster: KubernetesCluster): if isinstance(value, dict): config_string += f"\n[{key}]\n{toml.dumps(value)}" utils.dump_file(cluster, config_string, 'containerd-config.toml') + if utils.check_dry_run_status_active(cluster): + return with RemoteExecutor(cluster) as exe: for node in cluster.nodes['control-plane'].include_group( cluster.nodes.get('worker')).get_ordered_members_list( From d10183212222f730cc82340db84647d29d32fc3d Mon Sep 17 00:00:00 2001 From: Vasudeo Nimbekar Date: Wed, 28 Jun 2023 12:33:06 +0530 Subject: [PATCH 3/8] added dry-run for add-node procedure --- kubemarine/core/cluster.py | 3 ++- kubemarine/core/flow.py | 6 +++--- kubemarine/core/utils.py | 8 ++++++-- kubemarine/kubernetes/__init__.py | 5 ++++- kubemarine/plugins/nginx_ingress.py | 3 +++ 5 files changed, 18 insertions(+), 7 deletions(-) diff --git a/kubemarine/core/cluster.py b/kubemarine/core/cluster.py index 162e4938f..99d6c0f8c 100755 --- a/kubemarine/core/cluster.py +++ b/kubemarine/core/cluster.py @@ -343,7 +343,8 @@ def dump_finalized_inventory(self): data = yaml.dump(inventory_for_dump) finalized_filename = "cluster_finalized.yaml" utils.dump_file(self, data, finalized_filename) - utils.dump_file(self, data, finalized_filename, dump_location=False) + if not utils.check_dry_run_status_active(self): + utils.dump_file(self, data, finalized_filename, dump_location=False) def preserve_inventory(self): self.log.debug("Start preserving of the information about the procedure.") diff --git a/kubemarine/core/flow.py b/kubemarine/core/flow.py index 4e55bed7c..b90a5b964 100755 --- a/kubemarine/core/flow.py +++ b/kubemarine/core/flow.py @@ -131,8 +131,8 @@ def run_actions(resources: res.DynamicResources, actions: List[action.Action]) - timestamp = utils.get_current_timestamp_formatted() inventory_file_basename = os.path.basename(resources.inventory_filepath) utils.dump_file(context, stream, "%s_%s" % (inventory_file_basename, str(timestamp))) - - resources.recreate_inventory() + if not utils.check_dry_run_status_active(last_cluster): + resources.recreate_inventory() _post_process_actions_group(last_cluster, context, successfully_performed) successfully_performed = [] last_cluster = None @@ -313,7 +313,7 @@ def new_common_parser(cli_help): help='define main cluster configuration file') parser.add_argument('--ansible-inventory-location', - default='./ansible-inventory.ini', + default='ansible-inventory.ini', help='auto-generated ansible-compatible inventory file location') parser.add_argument('--dump-location', diff --git a/kubemarine/core/utils.py b/kubemarine/core/utils.py index 140a72f63..20cdf4c38 100755 --- a/kubemarine/core/utils.py +++ b/kubemarine/core/utils.py @@ -152,7 +152,11 @@ def make_ansible_inventory(location, cluster): config_compiled += '\n' + string config_compiled += '\n\n' - dump_file({}, config_compiled, location, dump_location=False) + dump_location = False + args = cluster.context.get('execution_arguments') + if args.get('without_act', None): + dump_location = True + dump_file(cluster, config_compiled, location, dump_location=dump_location) def get_current_timestamp_formatted(): @@ -205,7 +209,7 @@ def dump_file(context, data: object, filename: str, cluster = context context = cluster.context - args = context['execution_arguments'] + args = context.get('execution_arguments', {}) if args.get('disable_dump', True) \ and not (filename in ClusterStorage.PRESERVED_DUMP_FILES and context['preserve_inventory']): return diff --git a/kubemarine/kubernetes/__init__.py b/kubemarine/kubernetes/__init__.py index fb6d29e6e..5f3db33d8 100644 --- a/kubemarine/kubernetes/__init__.py +++ b/kubemarine/kubernetes/__init__.py @@ -363,6 +363,7 @@ def join_other_control_planes(group): def join_new_control_plane(group, dry_run=False): dry_run = utils.check_dry_run_status_active(group.cluster) + log = group.cluster.log join_dict = get_join_dict(group, dry_run) for node in group.get_ordered_members_list(provide_node_configs=True): if dry_run: @@ -645,6 +646,9 @@ def wait_uncordon(node: NodeGroup): def wait_for_nodes(group: NodeGroup): log = group.cluster.log + if utils.check_dry_run_status_active(group.cluster): + log.debug("[dry-run] All nodes are ready!") + return first_control_plane = group.cluster.nodes["control-plane"].get_first_member() node_names = group.get_nodes_names() @@ -1043,7 +1047,6 @@ def verify_upgrade_versions(cluster): " -o custom-columns='VERSION:.status.nodeInfo.kubeletVersion' " "| grep -vw ^VERSION ") curr_version = list(result.values())[0].stdout - print("@@@@@@@@@@@@@@@@@@@@@@@@@@", curr_version) test_version_upgrade_possible(curr_version, upgrade_version, skip_equal=True) diff --git a/kubemarine/plugins/nginx_ingress.py b/kubemarine/plugins/nginx_ingress.py index 144b0c1cc..86dace554 100644 --- a/kubemarine/plugins/nginx_ingress.py +++ b/kubemarine/plugins/nginx_ingress.py @@ -28,6 +28,9 @@ def check_job_for_nginx(cluster: KubernetesCluster): major_version = int(version[1]) minor_version = int(version[2]) + if utils.check_dry_run_status_active(cluster): + cluster.log.debug('[dry-run] There are no jobs to delete') + return check_jobs = first_control_plane['connection'].sudo(f"kubectl get jobs -n ingress-nginx") if list(check_jobs.values())[0].stderr == "" and major_version >= 1 and minor_version >= 4: From 3c9f23db34dafe6bc67f89bad015b529e6b23e93 Mon Sep 17 00:00:00 2001 From: Vasudeo Nimbekar Date: Wed, 5 Jul 2023 15:42:49 +0530 Subject: [PATCH 4/8] added support for restore procedure --- kubemarine/apt.py | 4 +-- kubemarine/plugins/nginx_ingress.py | 5 ++++ kubemarine/procedures/restore.py | 46 ++++++++++++++++++----------- kubemarine/system.py | 4 ++- kubemarine/yum.py | 18 ++++++----- 5 files changed, 49 insertions(+), 28 deletions(-) diff --git a/kubemarine/apt.py b/kubemarine/apt.py index c9a17d2dd..bc4a9ba16 100644 --- a/kubemarine/apt.py +++ b/kubemarine/apt.py @@ -100,7 +100,7 @@ def remove(group, include=None, exclude=None, **kwargs) -> NodeGroupResult: exclude = ','.join(exclude) command += ' --exclude=%s' % exclude - return group.sudo(command, **kwargs) + return group.sudo(command, dry_run=utils.check_dry_run_status_active(group.cluster), **kwargs) def upgrade(group, include=None, exclude=None, **kwargs) -> NodeGroupResult: @@ -117,7 +117,7 @@ def upgrade(group, include=None, exclude=None, **kwargs) -> NodeGroupResult: exclude = ','.join(exclude) command += ' --exclude=%s' % exclude - return group.sudo(command, **kwargs) + return group.sudo(command, dry_run=utils.check_dry_run_status_active(group.cluster), **kwargs) def no_changes_found(action: callable, result: fabric.runners.Result) -> bool: diff --git a/kubemarine/plugins/nginx_ingress.py b/kubemarine/plugins/nginx_ingress.py index 86dace554..a66c78d8f 100644 --- a/kubemarine/plugins/nginx_ingress.py +++ b/kubemarine/plugins/nginx_ingress.py @@ -39,6 +39,7 @@ def check_job_for_nginx(cluster: KubernetesCluster): else: cluster.log.debug('There are no jobs to delete') + def enrich_inventory(inventory, _): if not inventory["plugins"]["nginx-ingress-controller"]["install"]: return inventory @@ -105,6 +106,10 @@ def manage_custom_certificate(cluster): first_control_plane = cluster.nodes["control-plane"].get_first_member() default_cert = cluster.inventory["plugins"]["nginx-ingress-controller"]["controller"]["ssl"]["default-certificate"] + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Successfully aplied custom certificates") + return + # first, we need to load cert and key files to first control-plane to known locations first_control_plane.sudo(f"mkdir -p {base_path}") try: diff --git a/kubemarine/procedures/restore.py b/kubemarine/procedures/restore.py index cfd168685..8215c8eff 100755 --- a/kubemarine/procedures/restore.py +++ b/kubemarine/procedures/restore.py @@ -110,19 +110,24 @@ def verify_backup_data(cluster): def stop_cluster(cluster): cluster.log.debug('Stopping the existing cluster...') cri_impl = cluster.inventory['services']['cri']['containerRuntime'] + dry_run = utils.check_dry_run_status_active(cluster) if cri_impl == "docker": - result = cluster.nodes['control-plane'].sudo('systemctl stop kubelet; ' - 'sudo docker kill $(sudo docker ps -q); ' - 'sudo docker rm -f $(sudo docker ps -a -q); ' - 'sudo docker ps -a; ' - 'sudo rm -rf /var/lib/etcd; ' - 'sudo mkdir -p /var/lib/etcd', warn=True) + result = cluster.nodes['control-plane'].sudo( + 'systemctl stop kubelet; ' + 'sudo docker kill $(sudo docker ps -q); ' + 'sudo docker rm -f $(sudo docker ps -a -q); ' + 'sudo docker ps -a; ' + 'sudo rm -rf /var/lib/etcd; ' + 'sudo mkdir -p /var/lib/etcd', warn=True, + dry_run=dry_run) else: - result = cluster.nodes['control-plane'].sudo('systemctl stop kubelet; ' - 'sudo crictl rm -fa; ' - 'sudo crictl ps -a; ' - 'sudo rm -rf /var/lib/etcd; ' - 'sudo mkdir -p /var/lib/etcd', warn=True) + result = cluster.nodes['control-plane'].sudo( + 'systemctl stop kubelet; ' + 'sudo crictl rm -fa; ' + 'sudo crictl ps -a; ' + 'sudo rm -rf /var/lib/etcd; ' + 'sudo mkdir -p /var/lib/etcd', warn=True, + dry_run=dry_run) cluster.log.verbose(result) @@ -138,9 +143,10 @@ def restore_thirdparties(cluster): def import_nodes(cluster): + dry_run = utils.check_dry_run_status_active(cluster) for node in cluster.nodes['all'].get_ordered_members_list(provide_node_configs=True): node['connection'].put(os.path.join(cluster.context['backup_tmpdir'], 'nodes_data', '%s.tar.gz' % node['name']), - '/tmp/kubemarine-backup.tar.gz') + '/tmp/kubemarine-backup.tar.gz', dry_run=dry_run) cluster.log.debug('Backup \'%s\' uploaded' % node['name']) cluster.log.debug('Unpacking backup...') @@ -149,10 +155,11 @@ def import_nodes(cluster): for node in cluster.nodes['all'].get_ordered_members_list(provide_node_configs=True): cmd = f"readlink /etc/resolv.conf ;" \ f"if [ $? -ne 0 ]; then sudo chattr -i /etc/resolv.conf; sudo tar xzvf /tmp/kubemarine-backup.tar.gz -C / --overwrite && sudo chattr +i /etc/resolv.conf; else sudo tar xzvf /tmp/kubemarine-backup.tar.gz -C / --overwrite; fi " - node['connection'].sudo(cmd) - + node['connection'].sudo(cmd, dry_run=dry_run) + if dry_run: + return result = exe.get_last_results_str() - cluster.log.debug('%s',result) + cluster.log.debug('%s', result) def import_etcd(cluster: KubernetesCluster): @@ -164,6 +171,7 @@ def import_etcd(cluster: KubernetesCluster): etcd_peer_key = etcd_all_certificates.get('peer_key', cluster.globals['etcd']['default_arguments']['peer_key']) etcd_peer_cacert = etcd_all_certificates.get('peer_cacert', cluster.globals['etcd']['default_arguments']['peer_cacert']) + dry_run = utils.check_dry_run_status_active(cluster) etcd_image = cluster.procedure_inventory.get('restore_plan', {}).get('etcd', {}).get('image') if not etcd_image: @@ -174,8 +182,12 @@ def import_etcd(cluster: KubernetesCluster): cluster.log.debug('Uploading ETCD snapshot...') snap_name = '/var/lib/etcd/etcd-snapshot%s.db' % int(round(time.time() * 1000)) - cluster.nodes['control-plane'].put(os.path.join(cluster.context['backup_tmpdir'], 'etcd.db'), snap_name, sudo=True) - + cluster.nodes['control-plane'].put(os.path.join(cluster.context['backup_tmpdir'], 'etcd.db'), + snap_name, sudo=True, + dry_run=dry_run) + if dry_run: + cluster.log.verbose('ETCD cluster is healthy!') + return initial_cluster_list = [] initial_cluster_list_without_names = [] for control_plane in cluster.nodes['control-plane'].get_ordered_members_list(provide_node_configs=True): diff --git a/kubemarine/system.py b/kubemarine/system.py index b483cf452..36b5ef988 100644 --- a/kubemarine/system.py +++ b/kubemarine/system.py @@ -338,7 +338,9 @@ def reboot_nodes(cluster: KubernetesCluster): def reboot_group(group: NodeGroup, try_graceful=None): log = group.cluster.log - + if utils.check_dry_run_status_active(group.cluster): + log.debug("[dry-run] Rebooting Nodes...") + return if try_graceful is None: if 'controlplain_uri' not in group.cluster.context.keys(): kubernetes.is_cluster_installed(group.cluster) diff --git a/kubemarine/yum.py b/kubemarine/yum.py index de5cab220..79b74f0a5 100644 --- a/kubemarine/yum.py +++ b/kubemarine/yum.py @@ -29,22 +29,24 @@ def backup_repo(group, repo_filename="*", **kwargs): if not group.cluster.inventory['services']['packages']['package_manager']['replace-repositories']: group.cluster.log.debug("Skipped - repos replacement disabled in configuration") return + dry_run = utils.check_dry_run_status_active(group.cluster) # all files in directory will be renamed: xxx.repo -> xxx.repo.bak # if there already any files with ".bak" extension, they should not be renamed to ".bak.bak"! return group.sudo("find /etc/yum.repos.d/ -type f -name '%s.repo' | " - "sudo xargs -t -iNAME mv -bf NAME NAME.bak" % repo_filename, **kwargs) + "sudo xargs -t -iNAME mv -bf NAME NAME.bak" % repo_filename, dry_run=dry_run, **kwargs) def add_repo(group, repo_data="", repo_filename="predefined", **kwargs): - create_repo_file(group, repo_data, get_repo_file_name(repo_filename)) - return group.sudo('yum clean all && sudo yum updateinfo', **kwargs) + dry_run = utils.check_dry_run_status_active(group.cluster) + create_repo_file(group, repo_data, get_repo_file_name(repo_filename), dry_run) + return group.sudo('yum clean all && sudo yum updateinfo', dry_run=dry_run, **kwargs) def get_repo_file_name(repo_filename="predefined"): return '/etc/yum.repos.d/%s.repo' % repo_filename -def create_repo_file(group, repo_data, repo_file): +def create_repo_file(group, repo_data, repo_file, dry_run=False): # if repo_data is dict, then convert it to string with config inside if isinstance(repo_data, dict): config = configparser.ConfigParser() @@ -54,7 +56,7 @@ def create_repo_file(group, repo_data, repo_file): config.write(repo_data) else: repo_data = io.StringIO(utils.read_external(repo_data)) - group.put(repo_data, repo_file, sudo=True) + group.put(repo_data, repo_file, sudo=True, dry_run=dry_run) def clean(group, mode="all", **kwargs): @@ -83,7 +85,7 @@ def install(group, include=None, exclude=None, **kwargs): command = get_install_cmd(include, exclude) - return group.sudo(command, **kwargs) + return group.sudo(command, dry_run=utils.check_dry_run_status_active(group.cluster), **kwargs) def remove(group, include=None, exclude=None, **kwargs): @@ -99,7 +101,7 @@ def remove(group, include=None, exclude=None, **kwargs): exclude = ','.join(exclude) command += ' --exclude=%s' % exclude - return group.sudo(command, **kwargs) + return group.sudo(command, dry_run=utils.check_dry_run_status_active(group.cluster), **kwargs) def upgrade(group, include=None, exclude=None, **kwargs): @@ -115,7 +117,7 @@ def upgrade(group, include=None, exclude=None, **kwargs): exclude = ','.join(exclude) command += ' --exclude=%s' % exclude - return group.sudo(command, **kwargs) + return group.sudo(command, dry_run=utils.check_dry_run_status_active(group.cluster), **kwargs) def no_changes_found(action: callable, result: fabric.runners.Result) -> bool: From c5f60261cbc0361cf37dd4797e24b20532ded5e7 Mon Sep 17 00:00:00 2001 From: Vasudeo Nimbekar Date: Fri, 7 Jul 2023 01:29:47 +0530 Subject: [PATCH 5/8] Fixed add_node procedure --- kubemarine/core/group.py | 6 ++++-- kubemarine/kubernetes/__init__.py | 13 ++++++------- kubemarine/procedures/add_node.py | 12 ++++++++++++ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/kubemarine/core/group.py b/kubemarine/core/group.py index b23f4fc4c..9993f5e1e 100755 --- a/kubemarine/core/group.py +++ b/kubemarine/core/group.py @@ -29,6 +29,7 @@ import invoke from invoke import UnexpectedExit +import fabric from kubemarine.core import utils, log from kubemarine.core.executor import ( @@ -310,9 +311,10 @@ def do_dry_run(self: 'NodeGroup', *args, **kwargs): if kwargs.get("dry_run"): if fn.__name__ == "put": self.cluster.log.verbose("Local file \"%s\" is being transferred to remote file \"%s\" on nodes %s with options %s" - % (args[0], args[1], list(self.nodes.keys()), kwargs)) + % (args[0], args[1], list(self.nodes), kwargs)) else: - self.cluster.log.verbose('Performing %s %s on nodes %s with options: %s' % (fn.__name__, args[0], list(self.nodes.keys()), kwargs)) + self.cluster.log.verbose('Performing %s %s on nodes %s with options: %s' % ( + fn.__name__, args[0], list(self.nodes), kwargs)) return NodeGroupResult(self.cluster, results) elif "dry_run" in kwargs.keys(): del kwargs["dry_run"] diff --git a/kubemarine/kubernetes/__init__.py b/kubemarine/kubernetes/__init__.py index 3d76db4f6..6c5524cc2 100644 --- a/kubemarine/kubernetes/__init__.py +++ b/kubemarine/kubernetes/__init__.py @@ -348,7 +348,7 @@ def join_other_control_planes(group: NodeGroup) -> RunnersGroupResult: other_control_planes_group = group.get_ordered_members_list()[1:] dry_run = utils.check_dry_run_status_active(group.cluster) log = group.cluster.log - join_dict = group.cluster.context["join_dict"] + join_dict = group.cluster.context.get("join_dict") for node in other_control_planes_group: if dry_run: log.debug('[dry-run]Joining control-plane \'%s\'...' % node['name']) @@ -363,7 +363,7 @@ def join_other_control_planes(group: NodeGroup) -> RunnersGroupResult: def join_new_control_plane(group: NodeGroup): dry_run = utils.check_dry_run_status_active(group.cluster) log = group.cluster.log - join_dict = get_join_dict(group) + join_dict = get_join_dict(group, dry_run=dry_run) for node in group.get_ordered_members_list(): if dry_run: log.debug('[dry-run]Joining control-plane \'%s\'...' % node['name']) @@ -1275,7 +1275,6 @@ def images_grouped_prepull(group: NodeGroup, group_size: int = None): """ cluster: KubernetesCluster = group.cluster - dry_run = utils.check_dry_run_status_active(cluster) log = cluster.log if group_size is None: @@ -1300,7 +1299,7 @@ def images_grouped_prepull(group: NodeGroup, group_size: int = None): for group_i in range(groups_amount): log.verbose('Prepulling images for group #%s...' % group_i) # RemoteExecutor used for future cases, when some nodes will require another/additional actions for prepull - for node_i in range(group_i*group_size, (group_i*group_size)+group_size): + for node_i in range(group_i*group_size, (group_i*group_size) + group_size): if node_i < nodes_amount: tokens.append(images_prepull(nodes[node_i])) @@ -1313,7 +1312,7 @@ def images_prepull(group: DeferredGroup) -> Token: :param group: NodeGroup where prepull should be performed. :return: NodeGroupResult from all nodes in presented group. """ - + dry_run = utils.check_dry_run_status_active(group.cluster) config = get_kubeadm_config(group.cluster.inventory) kubeadm_init: dict = { 'apiVersion': group.cluster.inventory["services"]["kubeadm"]['apiVersion'], @@ -1323,9 +1322,9 @@ def images_prepull(group: DeferredGroup) -> Token: configure_container_runtime(group.cluster, kubeadm_init) config = f'{config}---\n{yaml.dump(kubeadm_init, default_flow_style=False)}' - group.put(io.StringIO(config), '/etc/kubernetes/prepull-config.yaml', sudo=True) + group.put(io.StringIO(config), '/etc/kubernetes/prepull-config.yaml', sudo=True, dry_run=dry_run) - return group.sudo("kubeadm config images pull --config=/etc/kubernetes/prepull-config.yaml") + return group.sudo("kubeadm config images pull --config=/etc/kubernetes/prepull-config.yaml", dry_run=dry_run) def schedule_running_nodes_report(cluster: KubernetesCluster): diff --git a/kubemarine/procedures/add_node.py b/kubemarine/procedures/add_node.py index e9795a69b..d4f1cb6f9 100755 --- a/kubemarine/procedures/add_node.py +++ b/kubemarine/procedures/add_node.py @@ -102,12 +102,24 @@ def cache_installed_packages(cluster: KubernetesCluster): packages.cache_package_versions(cluster, cluster.inventory, by_initial_nodes=True) +def validate_new_nodes(cluster: KubernetesCluster): + old_nodes = set([node['connect_to'] for node in cluster.inventory['nodes'] + if 'add_node' not in node['roles']]) + new_nodes = set(cluster.nodes['add_node'].get_hosts()) + for host in new_nodes: + if host in old_nodes: + raise Exception(f"Node {host} is already present in cluster") + return None + + tasks: typing.OrderedDict[str, Any] = OrderedDict(copy.deepcopy(install.tasks)) del tasks["deploy"]["plugins"] del tasks["deploy"]["accounts"] tasks["deploy"]["kubernetes"]["init"] = deploy_kubernetes_join tasks["cache_packages"] = cache_installed_packages +tasks["validate_nodes"] = validate_new_nodes tasks.move_to_end("cache_packages", last=False) +tasks.move_to_end("validate_nodes", last=False) class AddNodeAction(Action): From f69796b56f6e27ec990137fc4568eb0515f115e6 Mon Sep 17 00:00:00 2001 From: Vasudeo Nimbekar Date: Mon, 10 Jul 2023 10:40:57 +0530 Subject: [PATCH 6/8] implemented dry-run for backup and restore procedures --- kubemarine/core/defaults.py | 2 +- kubemarine/procedures/backup.py | 2 ++ kubemarine/system.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/kubemarine/core/defaults.py b/kubemarine/core/defaults.py index 8b39d70f5..f0c2642c7 100755 --- a/kubemarine/core/defaults.py +++ b/kubemarine/core/defaults.py @@ -407,7 +407,7 @@ def enrich_inventory(cluster: KubernetesCluster, inventory: dict, make_dumps=Tru if make_dumps: from kubemarine import controlplane inventory_for_dump = controlplane.controlplane_finalize_inventory(cluster, prepare_for_dump(inventory)) - # utils.dump_file(cluster, yaml.dump(inventory_for_dump, ), "cluster.yaml") + utils.dump_file(cluster, yaml.dump(inventory_for_dump, ), "cluster.yaml") return inventory diff --git a/kubemarine/procedures/backup.py b/kubemarine/procedures/backup.py index 08e80782f..1e332beab 100755 --- a/kubemarine/procedures/backup.py +++ b/kubemarine/procedures/backup.py @@ -404,6 +404,8 @@ def pack_data(cluster: KubernetesCluster): target = os.path.join(target, backup_filename) cluster.log.debug('Packing all data...') + if utils.check_dry_run_status_active(cluster): + return None with tarfile.open(target, "w:gz") as tar_handle: for root, dirs, files in os.walk(backup_directory): for file in files: diff --git a/kubemarine/system.py b/kubemarine/system.py index eb4d4bfef..06db24dd1 100644 --- a/kubemarine/system.py +++ b/kubemarine/system.py @@ -335,10 +335,10 @@ def reboot_nodes(cluster: KubernetesCluster) -> None: def reboot_group(group: NodeGroup, try_graceful: bool = None) -> RunnersGroupResult: cluster: KubernetesCluster = group.cluster + log = cluster.log if utils.check_dry_run_status_active(cluster): log.debug("[dry-run] Rebooting Nodes...") return - log = cluster.log if try_graceful is None: if 'controlplain_uri' not in cluster.context.keys(): From 3c2871169494624f20061fdb42e3f6376b196f63 Mon Sep 17 00:00:00 2001 From: Vasudeo Nimbekar Date: Mon, 10 Jul 2023 14:31:08 +0530 Subject: [PATCH 7/8] Implemented dry-run for cert_renew, manage_psp, manage_pss and migrate_cri procedures --- kubemarine/admission.py | 118 ++++++++++++++------------- kubemarine/core/utils.py | 5 +- kubemarine/cri/docker.py | 8 +- kubemarine/k8s_certs.py | 21 +++-- kubemarine/kubernetes/__init__.py | 4 +- kubemarine/plugins/__init__.py | 4 +- kubemarine/procedures/migrate_cri.py | 20 +++-- 7 files changed, 101 insertions(+), 79 deletions(-) diff --git a/kubemarine/admission.py b/kubemarine/admission.py index 42ebcb60b..63c36165d 100644 --- a/kubemarine/admission.py +++ b/kubemarine/admission.py @@ -329,7 +329,7 @@ def restart_pods_task(cluster: KubernetesCluster): if not is_restart: cluster.log.debug("'restart-pods' is disabled, pods won't be restarted") return - + dry_run = utils.check_dry_run_status_active(cluster) first_control_plane = cluster.nodes["control-plane"].get_first_member() cluster.log.debug("Drain-Uncordon all nodes to restart pods") @@ -337,20 +337,20 @@ def restart_pods_task(cluster: KubernetesCluster): for node in kube_nodes.get_ordered_members_list(): first_control_plane.sudo( kubernetes.prepare_drain_command(cluster, node.get_node_name(), disable_eviction=False), - hide=False) - first_control_plane.sudo("kubectl uncordon %s" % node.get_node_name(), hide=False) + hide=False, dry_run=dry_run) + first_control_plane.sudo("kubectl uncordon %s" % node.get_node_name(), hide=False, dry_run=dry_run) cluster.log.debug("Restarting daemon-sets...") daemon_sets = ruamel.yaml.YAML().load(list(first_control_plane.sudo("kubectl get ds -A -o yaml").values())[0].stdout) for ds in daemon_sets["items"]: - first_control_plane.sudo("kubectl rollout restart ds %s -n %s" % (ds["metadata"]["name"], ds["metadata"]["namespace"])) + first_control_plane.sudo("kubectl rollout restart ds %s -n %s" % (ds["metadata"]["name"], ds["metadata"]["namespace"]),dry_run=dry_run) # we do not know to wait for, only for system pods maybe cluster.log.debug("Waiting for system pods...") kubernetes.wait_for_any_pods(cluster, first_control_plane) -def update_kubeadm_configmap_psp(first_control_plane: NodeGroup, target_state: str) -> str: +def update_kubeadm_configmap_psp(first_control_plane: NodeGroup, target_state: str, dry_run=False) -> str: yaml = ruamel.yaml.YAML() # load kubeadm config map and retrieve cluster config @@ -371,22 +371,23 @@ def update_kubeadm_configmap_psp(first_control_plane: NodeGroup, target_state: s buf = io.StringIO() yaml.dump(kubeadm_cm, buf) filename = uuid.uuid4().hex - first_control_plane.put(buf, "/tmp/%s.yaml" % filename) - first_control_plane.sudo("kubectl apply -f /tmp/%s.yaml" % filename) - first_control_plane.sudo("rm -f /tmp/%s.yaml" % filename) + first_control_plane.put(buf, "/tmp/%s.yaml" % filename, dry_run=dry_run) + first_control_plane.sudo("kubectl apply -f /tmp/%s.yaml" % filename, dry_run=dry_run) + first_control_plane.sudo("rm -f /tmp/%s.yaml" % filename, dry_run=dry_run) return final_plugins_string def update_kubeadm_configmap(first_control_plane: NodeGroup, target_state: str) -> str: admission_impl = first_control_plane.cluster.inventory['rbac']['admission'] + dry_run = utils.check_dry_run_status_active(first_control_plane.cluster) if admission_impl == "psp": - return update_kubeadm_configmap_psp(first_control_plane, target_state) + return update_kubeadm_configmap_psp(first_control_plane, target_state, dry_run) else: # admission_impl == "pss": - return update_kubeadm_configmap_pss(first_control_plane, target_state) + return update_kubeadm_configmap_pss(first_control_plane, target_state, dry_run) -def update_kubeapi_config_psp(control_planes: NodeGroup, plugins_list: str): +def update_kubeapi_config_psp(control_planes: NodeGroup, plugins_list: str, dry_run=False): yaml = ruamel.yaml.YAML() for control_plane in control_planes.get_ordered_members_list(): @@ -401,7 +402,7 @@ def update_kubeapi_config_psp(control_planes: NodeGroup, plugins_list: str): # place updated config on control-plane buf = io.StringIO() yaml.dump(conf, buf) - control_plane.put(buf, "/etc/kubernetes/manifests/kube-apiserver.yaml", sudo=True) + control_plane.put(buf, "/etc/kubernetes/manifests/kube-apiserver.yaml", sudo=True, dry_run=dry_run) # force kube-apiserver pod restart, then wait for api to become available if control_planes.cluster.inventory['services']['cri']['containerRuntime'] == 'containerd': @@ -416,10 +417,11 @@ def update_kubeapi_config_psp(control_planes: NodeGroup, plugins_list: str): def update_kubeapi_config(control_planes: NodeGroup, options_list: str): admission_impl = control_planes.cluster.inventory['rbac']['admission'] + dry_run = utils.check_dry_run_status_active(control_planes.cluster) if admission_impl == "psp": - return update_kubeapi_config_psp(control_planes, options_list) + return update_kubeapi_config_psp(control_planes, options_list, dry_run) elif admission_impl == "pss": - return update_kubeapi_config_pss(control_planes, options_list) + return update_kubeapi_config_pss(control_planes, options_list, dry_run) def is_security_enabled(inventory: dict): admission_impl = inventory['rbac']['admission'] @@ -457,7 +459,7 @@ def apply_default_pss(cluster: KubernetesCluster): elif procedure_config["pod-security"] == "enabled" and current_config["pod-security"] == "disabled": return manage_pss(cluster, "install") else: - return manage_pss(cluster, "init") + return manage_pss(cluster, "init") def delete_default_pss(cluster: KubernetesCluster): @@ -468,13 +470,14 @@ def delete_default_pss(cluster: KubernetesCluster): def manage_privileged_from_file(group: NodeGroup, filename, manage_type): + dry_run = utils.check_dry_run_status_active(group.cluster) if manage_type not in ["apply", "delete"]: raise Exception("unexpected manage type for privileged policy") privileged_policy = utils.read_internal(os.path.join(policies_file_path, filename)) remote_path = tmp_filepath_pattern % filename - group.put(io.StringIO(privileged_policy), remote_path, backup=True, sudo=True) + group.put(io.StringIO(privileged_policy), remote_path, backup=True, sudo=True, dry_run=dry_run) - return group.sudo("kubectl %s -f %s" % (manage_type, remote_path), warn=True) + return group.sudo("kubectl %s -f %s" % (manage_type, remote_path), warn=True, dry_run=dry_run) def resolve_oob_scope(oob_policies_conf: Dict[str, Any], selector: str): @@ -511,6 +514,7 @@ def manage_policies(group: NodeGroup, manage_type, manage_scope): psp_to_manage = manage_scope.get(psp_list_option, None) roles_to_manage = manage_scope.get(roles_list_option, None) bindings_to_manage = manage_scope.get(bindings_list_option, None) + dry_run = utils.check_dry_run_status_active(group.cluster) if not psp_to_manage and not roles_to_manage and not bindings_to_manage: group.cluster.log.verbose("No policies to %s" % manage_type) @@ -519,9 +523,9 @@ def manage_policies(group: NodeGroup, manage_type, manage_scope): template = collect_policies_template(psp_to_manage, roles_to_manage, bindings_to_manage) filename = uuid.uuid4().hex remote_path = tmp_filepath_pattern % filename - group.put(io.StringIO(template), remote_path, backup=True, sudo=True) - result = group.sudo("kubectl %s -f %s" % (manage_type, remote_path), warn=True) - group.sudo("rm -f %s" % remote_path) + group.put(io.StringIO(template), remote_path, backup=True, sudo=True, dry_run=dry_run) + result = group.sudo("kubectl %s -f %s" % (manage_type, remote_path), warn=True, dry_run=dry_run) + group.sudo("rm -f %s" % remote_path, dry_run=dry_run) return result @@ -628,57 +632,58 @@ def manage_enrichment(inventory: dict, cluster: KubernetesCluster): def manage_pss(cluster: KubernetesCluster, manage_type: str): first_control_plane = cluster.nodes["control-plane"].get_first_member() control_planes = cluster.nodes["control-plane"] + dry_run = utils.check_dry_run_status_active(cluster) # 'apply' - change options in admission.yaml, PSS is enabled if manage_type == "apply": # set labels for predifined plugins namespaces and namespaces defined in procedure config - label_namespace_pss(cluster, manage_type) + label_namespace_pss(cluster, manage_type, dry_run) # copy admission config on control-planes copy_pss(control_planes) for control_plane in control_planes.get_ordered_members_list(): # force kube-apiserver pod restart, then wait for api to become available if control_plane.cluster.inventory['services']['cri']['containerRuntime'] == 'containerd': control_plane.call(utils.wait_command_successful, command="crictl rm -f " - "$(sudo crictl ps --name kube-apiserver -q)") + "$(sudo crictl ps --name kube-apiserver -q)", dry_run=dry_run) else: control_plane.call(utils.wait_command_successful, command="docker stop " "$(sudo docker ps -f 'name=k8s_kube-apiserver'" - " | awk '{print $1}')") - control_plane.call(utils.wait_command_successful, command="kubectl get pod -n kube-system") + " | awk '{print $1}')", dry_run=dry_run) + control_plane.call(utils.wait_command_successful, command="kubectl get pod -n kube-system", dry_run=dry_run) # 'install' - enable PSS elif manage_type == "install": # set labels for predifined plugins namespaces and namespaces defined in procedure config - label_namespace_pss(cluster, manage_type) + label_namespace_pss(cluster, manage_type, dry_run) # copy admission config on control-planes copy_pss(cluster.nodes["control-plane"]) cluster.log.debug("Updating kubeadm config map") - final_features_list = first_control_plane.call(update_kubeadm_configmap_pss, target_state="enabled") + final_features_list = first_control_plane.call(update_kubeadm_configmap_pss, target_state="enabled", dry_run=dry_run) # update api-server config on all control-planes cluster.log.debug("Updating kube-apiserver configs on control-planes") - cluster.nodes["control-plane"].call(update_kubeapi_config_pss, features_list=final_features_list) + cluster.nodes["control-plane"].call(update_kubeapi_config_pss, features_list=final_features_list, dry_run=dry_run) # 'init' make changes during init Kubernetes cluster elif manage_type == "init": cluster.log.debug("Updating kubeadm config map") - first_control_plane.call(update_kubeadm_configmap_pss, target_state="enabled") + first_control_plane.call(update_kubeadm_configmap_pss, target_state="enabled", dry_run=dry_run) # 'delete' - disable PSS elif manage_type == "delete": # set labels for predifined plugins namespaces and namespaces defined in procedure config - label_namespace_pss(cluster, manage_type) + label_namespace_pss(cluster, manage_type, dry_run) final_features_list = first_control_plane.call(update_kubeadm_configmap, target_state="disabled") # update api-server config on all control-planes cluster.log.debug("Updating kube-apiserver configs on control-planes") - cluster.nodes["control-plane"].call(update_kubeapi_config_pss, features_list=final_features_list) + cluster.nodes["control-plane"].call(update_kubeapi_config_pss, features_list=final_features_list, dry_run=dry_run) # erase PSS admission config cluster.log.debug("Erase admission configuration... %s" % admission_path) group = cluster.nodes["control-plane"] - group.sudo("rm -f %s" % admission_path, warn=True) + group.sudo("rm -f %s" % admission_path, warn=True, dry_run=dry_run) -def update_kubeapi_config_pss(control_planes: NodeGroup, features_list: str): +def update_kubeapi_config_pss(control_planes: NodeGroup, features_list: str, dry_run=False): yaml = ruamel.yaml.YAML() for control_plane in control_planes.get_ordered_members_list(): @@ -712,15 +717,17 @@ def update_kubeapi_config_pss(control_planes: NodeGroup, features_list: str): # force kube-apiserver pod restart, then wait for api to become available if control_plane.cluster.inventory['services']['cri']['containerRuntime'] == 'containerd': control_plane.call(utils.wait_command_successful, command="crictl rm -f " - "$(sudo crictl ps --name kube-apiserver -q)") + "$(sudo crictl ps --name kube-apiserver -q)", + dry_run=dry_run) else: control_plane.call(utils.wait_command_successful, command="docker stop " "$(sudo docker ps -f 'name=k8s_kube-apiserver'" - " | awk '{print $1}')") - control_plane.call(utils.wait_command_successful, command="kubectl get pod -n kube-system") + " | awk '{print $1}')", + dry_run=dry_run) + control_plane.call(utils.wait_command_successful, command="kubectl get pod -n kube-system", dry_run=dry_run) -def update_kubeadm_configmap_pss(first_control_plane: NodeGroup, target_state: str) -> str: +def update_kubeadm_configmap_pss(first_control_plane: NodeGroup, target_state: str, dry_run=False) -> str: yaml = ruamel.yaml.YAML() final_feature_list = "" @@ -729,7 +736,7 @@ def update_kubeadm_configmap_pss(first_control_plane: NodeGroup, target_state: s result = first_control_plane.sudo("kubectl get cm kubeadm-config -n kube-system -o yaml") kubeadm_cm = yaml.load(list(result.values())[0].stdout) cluster_config = yaml.load(kubeadm_cm["data"]["ClusterConfiguration"]) - + # update kubeadm config map with feature list if target_state == "enabled": if "feature-gates" in cluster_config["apiServer"]["extraArgs"]: @@ -764,9 +771,9 @@ def update_kubeadm_configmap_pss(first_control_plane: NodeGroup, target_state: s buf = io.StringIO() yaml.dump(kubeadm_cm, buf) filename = uuid.uuid4().hex - first_control_plane.put(buf, "/tmp/%s.yaml" % filename) - first_control_plane.sudo("kubectl apply -f /tmp/%s.yaml" % filename) - first_control_plane.sudo("rm -f /tmp/%s.yaml" % filename) + first_control_plane.put(buf, "/tmp/%s.yaml" % filename, dry_run=dry_run) + first_control_plane.sudo("kubectl apply -f /tmp/%s.yaml" % filename, dry_run=dry_run) + first_control_plane.sudo("rm -f /tmp/%s.yaml" % filename, dry_run=dry_run) return final_feature_list @@ -814,9 +821,10 @@ def update_finalized_inventory(cluster: KubernetesCluster, inventory_to_finalize def copy_pss(group: NodeGroup): - if group.cluster.inventory['rbac']['admission'] != "pss": - + if group.cluster.inventory['rbac']['admission'] != "pss": return + + cluster = group.cluster if cluster.context.get('initial_procedure') == 'manage_pss': if not is_security_enabled(cluster.inventory) and \ cluster.procedure_inventory["pss"]["pod-security"] != "enabled": @@ -848,7 +856,7 @@ def copy_pss(group: NodeGroup): return result -def label_namespace_pss(cluster: KubernetesCluster, manage_type: str): +def label_namespace_pss(cluster: KubernetesCluster, manage_type: str, dry_run=False): first_control_plane = cluster.nodes["control-plane"].get_first_member() # set/delete labels on predifined plugins namsespaces for plugin in cluster.inventory["plugins"]: @@ -859,34 +867,34 @@ def label_namespace_pss(cluster: KubernetesCluster, manage_type: str): cluster.log.debug(f"Set PSS labels on namespace {privileged_plugins[plugin]}") for mode in valid_modes: first_control_plane.sudo(f"kubectl label ns {privileged_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}=privileged --overwrite") + f"pod-security.kubernetes.io/{mode}=privileged --overwrite", dry_run=dry_run) first_control_plane.sudo(f"kubectl label ns {privileged_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}-version=latest --overwrite") + f"pod-security.kubernetes.io/{mode}-version=latest --overwrite", dry_run=dry_run) elif is_install and plugin in baseline_plugins.keys(): # set label 'pod-security.kubernetes.io/enforce: baseline' for kubernetes dashboard cluster.log.debug(f"Set PSS labels on namespace {baseline_plugins[plugin]}") for mode in valid_modes: first_control_plane.sudo(f"kubectl label ns {baseline_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}=baseline --overwrite") + f"pod-security.kubernetes.io/{mode}=baseline --overwrite", dry_run=dry_run) first_control_plane.sudo(f"kubectl label ns {baseline_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}-version=latest --overwrite") + f"pod-security.kubernetes.io/{mode}-version=latest --overwrite", dry_run=dry_run) elif manage_type == "delete": if is_install and plugin in privileged_plugins.keys(): # delete label 'pod-security.kubernetes.io/enforce: privileged' for local provisioner and ingress namespaces cluster.log.debug(f"Delete PSS labels from namespace {privileged_plugins[plugin]}") for mode in valid_modes: first_control_plane.sudo(f"kubectl label ns {privileged_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}- || true") + f"pod-security.kubernetes.io/{mode}- || true", dry_run=dry_run) first_control_plane.sudo(f"kubectl label ns {privileged_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}-version- || true") + f"pod-security.kubernetes.io/{mode}-version- || true", dry_run=dry_run) elif is_install and plugin in baseline_plugins.keys(): # delete 'pod-security.kubernetes.io/enforce: baseline' for kubernetes dashboard cluster.log.debug(f"Delete PSS labels from namespace {baseline_plugins[plugin]}") for mode in valid_modes: first_control_plane.sudo(f"kubectl label ns {baseline_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}- || true") + f"pod-security.kubernetes.io/{mode}- || true", dry_run=dry_run) first_control_plane.sudo(f"kubectl label ns {baseline_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}-version- || true") + f"pod-security.kubernetes.io/{mode}-version- || true", dry_run=dry_run) procedure_config = cluster.procedure_inventory["pss"] namespaces = procedure_config.get("namespaces") @@ -910,25 +918,25 @@ def label_namespace_pss(cluster: KubernetesCluster, manage_type: str): cluster.log.debug(f"Set PSS labels on {ns_name} namespace from defaults") for mode in default_modes: first_control_plane.sudo(f"kubectl label ns {ns_name} " - f"pod-security.kubernetes.io/{mode}={default_modes[mode]} --overwrite") + f"pod-security.kubernetes.io/{mode}={default_modes[mode]} --overwrite", dry_run=dry_run) if isinstance(namespace, dict): # set labels that are set in namespaces section cluster.log.debug(f"Set PSS labels on {ns_name} namespace") for item in list(namespace[ns_name]): first_control_plane.sudo(f"kubectl label ns {ns_name} " - f"pod-security.kubernetes.io/{item}={namespace[ns_name][item]} --overwrite") + f"pod-security.kubernetes.io/{item}={namespace[ns_name][item]} --overwrite", dry_run=dry_run) elif manage_type == "delete": # delete labels that are set in default section if default_modes: cluster.log.debug(f"Delete PSS labels on {ns_name} namespace from defaults") for mode in default_modes: - first_control_plane.sudo(f"kubectl label ns {ns_name} pod-security.kubernetes.io/{mode}-") + first_control_plane.sudo(f"kubectl label ns {ns_name} pod-security.kubernetes.io/{mode}-", dry_run=dry_run) # delete labels that are set in namespaces section cluster.log.debug(f"Delete PSS labels on {ns_name} namespace") if isinstance(namespace, dict): for item in list(namespace[ns_name]): first_control_plane.sudo(f"kubectl label ns {ns_name} " - f"pod-security.kubernetes.io/{item}-") + f"pod-security.kubernetes.io/{item}-", dry_run=dry_run) def check_inventory(cluster: KubernetesCluster): diff --git a/kubemarine/core/utils.py b/kubemarine/core/utils.py index 5b512e790..5ba3e107d 100755 --- a/kubemarine/core/utils.py +++ b/kubemarine/core/utils.py @@ -239,11 +239,14 @@ def get_dump_filepath(context, filename): return get_external_resource_path(os.path.join(context['execution_arguments']['dump_location'], 'dump', filename)) -def wait_command_successful(g, command, retries=15, timeout=5, warn=True, hide=False): +def wait_command_successful(g, command, retries=15, timeout=5, warn=True, hide=False, dry_run=False): from kubemarine.core.group import NodeGroup group: NodeGroup = g log = group.cluster.log + if dry_run: + log.debug("[dry-run] Command succeeded") + return while retries > 0: log.debug("Waiting for command to succeed, %s retries left" % retries) diff --git a/kubemarine/cri/docker.py b/kubemarine/cri/docker.py index 3d02276ee..ef0e41bb2 100755 --- a/kubemarine/cri/docker.py +++ b/kubemarine/cri/docker.py @@ -64,6 +64,7 @@ def configure(group: NodeGroup) -> RunnersGroupResult: utils.dump_file(group.cluster, settings_json, 'docker-daemon.json') if utils.check_dry_run_status_active(group.cluster): group.cluster.log.debug("[dry-run] Configuring Docker") + return tokens = [] with group.new_executor() as exe: @@ -81,9 +82,10 @@ def configure(group: NodeGroup) -> RunnersGroupResult: return exe.get_merged_runners_result(tokens) -def prune(group: NodeGroup) -> RunnersGroupResult: +def prune(group: NodeGroup, dry_run=False) -> RunnersGroupResult: return group.sudo('docker container stop $(sudo docker container ls -aq); ' 'sudo docker container rm $(sudo docker container ls -aq); ' 'sudo docker system prune -a -f; ' - # kill all containerd-shim processes, so that no orphan containers remain - 'sudo pkill -9 -f "^containerd-shim"', warn=True) + # kill all containerd-shim processes, so that no orphan containers remain + 'sudo pkill -9 -f "^containerd-shim"', warn=True, + dry_run=dry_run) diff --git a/kubemarine/k8s_certs.py b/kubemarine/k8s_certs.py index 2b41cd48a..3d5762a94 100644 --- a/kubemarine/k8s_certs.py +++ b/kubemarine/k8s_certs.py @@ -16,12 +16,14 @@ from kubemarine import kubernetes from kubemarine.core.cluster import KubernetesCluster from kubemarine.core.group import NodeGroup +from kubemarine.core import utils def k8s_certs_overview(control_planes: NodeGroup) -> None: + dry_run = utils.check_dry_run_status_active(control_planes.cluster) for control_plane in control_planes.get_ordered_members_list(): control_planes.cluster.log.debug(f"Checking certs expiration for control_plane {control_plane.get_node_name()}") - control_plane.sudo("kubeadm certs check-expiration", hide=False) + control_plane.sudo("kubeadm certs check-expiration", hide=False, dry_run=dry_run) def renew_verify(inventory: dict, cluster: KubernetesCluster) -> dict: @@ -36,16 +38,16 @@ def renew_verify(inventory: dict, cluster: KubernetesCluster) -> dict: def renew_apply(control_planes: NodeGroup) -> None: log = control_planes.cluster.log - + dry_run = utils.check_dry_run_status_active(control_planes.cluster) procedure = control_planes.cluster.procedure_inventory["kubernetes"] cert_list = procedure["cert-list"] for cert in cert_list: - control_planes.sudo(f"kubeadm certs renew {cert}") + control_planes.sudo(f"kubeadm certs renew {cert}", dry_run=dry_run) if "all" in cert_list or "admin.conf" in cert_list: # need to update cluster-admin config - kubernetes.copy_admin_config(log, control_planes) + kubernetes.copy_admin_config(log, control_planes, dry_run=dry_run) control_planes.call(force_renew_kubelet_serving_certs) @@ -60,19 +62,20 @@ def force_restart_control_plane(control_planes: NodeGroup) -> None: cri_impl = control_planes.cluster.inventory['services']['cri']['containerRuntime'] restart_containers = ["etcd", "kube-scheduler", "kube-apiserver", "kube-controller-manager"] c_filter = "grep -e %s" % " -e ".join(restart_containers) - + dry_run = utils.check_dry_run_status_active(control_planes.cluster) if cri_impl == "docker": - control_planes.sudo("sudo docker container rm -f $(sudo docker ps -a | %s | awk '{ print $1 }')" % c_filter, warn=True) + control_planes.sudo("sudo docker container rm -f $(sudo docker ps -a | %s | awk '{ print $1 }')" % c_filter, warn=True, dry_run=dry_run) else: - control_planes.sudo("sudo crictl rm -f $(sudo crictl ps -a | %s | awk '{ print $1 }')" % c_filter, warn=True) + control_planes.sudo("sudo crictl rm -f $(sudo crictl ps -a | %s | awk '{ print $1 }')" % c_filter, warn=True, dry_run=dry_run) def force_renew_kubelet_serving_certs(control_planes: NodeGroup) -> None: # Delete *serving* kubelet cert (kubelet.crt) and restart kubelet to create new up-to-date cert. # Client kubelet cert (kubelet.conf) is assumed to be updated automatically by kubelet. + dry_run = utils.check_dry_run_status_active(control_planes.cluster) for control_plane in control_planes.get_ordered_members_list(): - control_plane.sudo(f"rm -f /var/lib/kubelet/pki/kubelet.crt /var/lib/kubelet/pki/kubelet.key") - control_planes.sudo("systemctl restart kubelet") + control_plane.sudo(f"rm -f /var/lib/kubelet/pki/kubelet.crt /var/lib/kubelet/pki/kubelet.key", dry_run=dry_run) + control_planes.sudo("systemctl restart kubelet", dry_run=dry_run) def verify_all_is_absent_or_single(cert_list: List[str]) -> None: diff --git a/kubemarine/kubernetes/__init__.py b/kubemarine/kubernetes/__init__.py index 5424173a2..021f34a45 100644 --- a/kubemarine/kubernetes/__init__.py +++ b/kubemarine/kubernetes/__init__.py @@ -479,10 +479,10 @@ def local_admin_config(node: NodeGroup) -> Iterator[str]: node.sudo(f'rm -f {temp_filepath}') -def copy_admin_config(logger: log.EnhancedLogger, nodes: AbstractGroup[RunResult]) -> None: +def copy_admin_config(logger: log.EnhancedLogger, nodes: AbstractGroup[RunResult], dry_run=False) -> None: logger.debug("Setting up admin-config...") command = "mkdir -p /root/.kube && sudo cp -f /etc/kubernetes/admin.conf /root/.kube/config" - nodes.sudo(command) + nodes.sudo(command, dry_run=dry_run) def fetch_admin_config(cluster: KubernetesCluster) -> str: diff --git a/kubemarine/plugins/__init__.py b/kubemarine/plugins/__init__.py index 672318af2..e2685236e 100755 --- a/kubemarine/plugins/__init__.py +++ b/kubemarine/plugins/__init__.py @@ -413,7 +413,9 @@ def expect_deployment(cluster: KubernetesCluster, def expect_pods(cluster: KubernetesCluster, pods: List[str], namespace=None, timeout=None, retries=None, node: NodeGroup = None, apply_filter: str = None): - + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("Pods are ready!") + return None if timeout is None: timeout = cluster.inventory['globals']['expect']['pods']['plugins']['timeout'] if retries is None: diff --git a/kubemarine/procedures/migrate_cri.py b/kubemarine/procedures/migrate_cri.py index ff5641efb..05f7781f8 100755 --- a/kubemarine/procedures/migrate_cri.py +++ b/kubemarine/procedures/migrate_cri.py @@ -22,6 +22,7 @@ from kubemarine import kubernetes, etcd, thirdparties, cri from kubemarine.core import flow +from kubemarine.core import utils from kubemarine.core.action import Action from kubemarine.core.cluster import KubernetesCluster from kubemarine.core.group import NodeGroup @@ -147,7 +148,7 @@ def _migrate_cri(cluster: KubernetesCluster, node_group: List[NodeGroup]): :param cluster: main object describing a cluster :param node_group: group of nodes to migrate """ - + dry_run = utils.check_dry_run_status_active(cluster) for node in node_group: node_config = node.get_config() node_name = node.get_node_name() @@ -163,7 +164,7 @@ def _migrate_cri(cluster: KubernetesCluster, node_group: List[NodeGroup]): version = cluster.inventory["services"]["kubeadm"]["kubernetesVersion"] cluster.log.debug("Migrating \"%s\"..." % node_name) drain_cmd = kubernetes.prepare_drain_command(cluster, node_name, disable_eviction=True) - control_plane.sudo(drain_cmd, hide=False) + control_plane.sudo(drain_cmd, hide=False, dry_run=dry_run) kubeadm_flags_file = "/var/lib/kubelet/kubeadm-flags.env" kubeadm_flags = node.sudo(f"cat {kubeadm_flags_file}").get_simple_out() @@ -175,14 +176,14 @@ def _migrate_cri(cluster: KubernetesCluster, node_group: List[NodeGroup]): kubeadm_flags = edit_config(kubeadm_flags) - node.put(io.StringIO(kubeadm_flags), kubeadm_flags_file, backup=True, sudo=True) + node.put(io.StringIO(kubeadm_flags), kubeadm_flags_file, backup=True, sudo=True, dry_run=dry_run) - node.sudo("systemctl stop kubelet") - docker.prune(node) + node.sudo("systemctl stop kubelet", dry_run=dry_run) + docker.prune(node, dry_run=dry_run) docker_associations = cluster.get_associations_for_node(node.get_host(), 'docker') node.sudo(f"systemctl disable {docker_associations['service_name']} --now; " - "sudo sh -c 'rm -rf /var/lib/docker/*'") + "sudo sh -c 'rm -rf /var/lib/docker/*'", dry_run=dry_run) cluster.log.debug('Reinstalling CRI...') cri.install(node) @@ -190,6 +191,8 @@ def _migrate_cri(cluster: KubernetesCluster, node_group: List[NodeGroup]): cluster.log.debug(f'CRI configured! Restoring pods on node "{node_name}"') + if dry_run: + return # if there is a disk for docker in "/etc/fstab", then use this disk for containerd docker_disk_result = node.sudo("cat /etc/fstab | grep ' /var/lib/docker '", warn=True) docker_disk = list(docker_disk_result.values())[0].stdout.strip() @@ -253,6 +256,7 @@ def release_calico_leaked_ips(cluster: KubernetesCluster): Those ips are cleaned by calico garbage collector, but it can take about 20 minutes. This task releases problem ips with force. """ + dry_run = utils.check_dry_run_status_active(cluster) first_control_plane = cluster.nodes['control-plane'].get_first_member() cluster.log.debug("Getting leaked ips...") random_report_name = "/tmp/%s.json" % uuid.uuid4().hex @@ -261,9 +265,9 @@ def release_calico_leaked_ips(cluster: KubernetesCluster): leaked_ips_count = leaked_ips.count('leaked') cluster.log.debug(f"Found {leaked_ips_count} leaked ips") if leaked_ips_count != 0: - first_control_plane.sudo(f"calicoctl ipam release --from-report={random_report_name} --force", hide=False) + first_control_plane.sudo(f"calicoctl ipam release --from-report={random_report_name} --force", hide=False, dry_run=dry_run) cluster.log.debug("Leaked ips was released") - first_control_plane.sudo(f"rm {random_report_name}", hide=False) + first_control_plane.sudo(f"rm {random_report_name}", hide=False, dry_run=dry_run) def edit_config(kubeadm_flags: str): From c0a038e69c26db3b940c3bc3d4261c4d9a0ac4ec Mon Sep 17 00:00:00 2001 From: Vasudeo Nimbekar Date: Tue, 18 Jul 2023 12:21:29 +0530 Subject: [PATCH 8/8] implemented dry-run for migrate-cri and migrate-kubemarine procedures --- kubemarine/core/group.py | 7 ++++--- kubemarine/haproxy.py | 4 +++- kubemarine/procedures/do.py | 6 +++++- kubemarine/procedures/migrate_kubemarine.py | 17 ++++++++++------- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/kubemarine/core/group.py b/kubemarine/core/group.py index f88377565..ab72b5a88 100755 --- a/kubemarine/core/group.py +++ b/kubemarine/core/group.py @@ -267,15 +267,16 @@ def result(self) -> RunnersGroupResult: GROUP_RUN_TYPE = TypeVar('GROUP_RUN_TYPE', bound=RunResult, covariant=True) GROUP_SELF = TypeVar('GROUP_SELF', bound='AbstractGroup[Union[RunnersGroupResult, Token]]') -def _handle_dry_run(fn: callable) -> callable: + +def _handle_dry_run(fn: Callable) -> Callable: """ Method is a decorator that handles internal streaming of output (hide=False) of fabric (invoke). Note! This decorator should be the outermost. :param fn: Origin function to apply annotation to :return: Validation wrapper function """ - def do_dry_run(self: 'NodeGroup', *args, **kwargs): - results = {} + def do_dry_run(self: NodeGroup, *args, **kwargs) -> NodeGroupResult: + results: Dict[str, str] = {} if kwargs.get("dry_run"): if fn.__name__ == "put": diff --git a/kubemarine/haproxy.py b/kubemarine/haproxy.py index ab8fd025e..37a986511 100644 --- a/kubemarine/haproxy.py +++ b/kubemarine/haproxy.py @@ -159,9 +159,11 @@ def uninstall(group: NodeGroup) -> RunnersGroupResult: return packages.remove(group, include=['haproxy', 'rh-haproxy18']) -def restart(group: NodeGroup) -> None: +def restart(group: NodeGroup, dry_run: bool = False) -> None: cluster: KubernetesCluster = group.cluster cluster.log.debug("Restarting haproxy in all group...") + if dry_run: + return with group.new_executor() as exe: for node in exe.group.get_ordered_members_list(): service_name = _get_associations_for_node(node)['service_name'] diff --git a/kubemarine/procedures/do.py b/kubemarine/procedures/do.py index 0f1a8ff6a..31ba81200 100755 --- a/kubemarine/procedures/do.py +++ b/kubemarine/procedures/do.py @@ -18,6 +18,7 @@ import sys from typing import Callable, List, Dict +from kubemarine.core import utils from kubemarine.core import flow, resources from kubemarine.core.action import Action from kubemarine.core.cluster import KubernetesCluster @@ -31,6 +32,7 @@ shell_command command to execute on nodes """ + class CLIAction(Action): def __init__(self, node_group_provider: Callable[[KubernetesCluster], NodeGroup], remote_args: List[str], no_stream: bool) -> None: @@ -45,7 +47,9 @@ def run(self, res: DynamicResources) -> None: if executors_group.is_empty(): print('Failed to find any of specified nodes or groups') sys.exit(1) - + if utils.check_dry_run_status_active(cluster): + result = executors_group.sudo(" ".join(self.remote_args), hide=self.no_stream, warn=True, dry_run=True) + sys.exit(0) result = executors_group.sudo(" ".join(self.remote_args), hide=self.no_stream, warn=True) if self.no_stream: cluster.log.debug(result) diff --git a/kubemarine/procedures/migrate_kubemarine.py b/kubemarine/procedures/migrate_kubemarine.py index 815bee793..5e2a7c076 100644 --- a/kubemarine/procedures/migrate_kubemarine.py +++ b/kubemarine/procedures/migrate_kubemarine.py @@ -119,7 +119,7 @@ def upgrade_cri(self, group: NodeGroup, workers: bool) -> None: drain_timeout = cluster.procedure_inventory.get('drain_timeout') grace_period = cluster.procedure_inventory.get('grace_period') disable_eviction = cluster.procedure_inventory.get("disable-eviction", True) - + dry_run = utils.check_dry_run_status_active(cluster) for node in group.get_ordered_members_list(): node_name = node.get_node_name() control_plane = node @@ -129,11 +129,13 @@ def upgrade_cri(self, group: NodeGroup, workers: bool) -> None: drain_cmd = kubernetes.prepare_drain_command( cluster, node_name, disable_eviction=disable_eviction, drain_timeout=drain_timeout, grace_period=grace_period) - control_plane.sudo(drain_cmd, hide=False) - - kubernetes.upgrade_cri_if_required(node) - node.sudo('systemctl restart kubelet') + control_plane.sudo(drain_cmd, hide=False, dry_run=dry_run) + kubernetes.upgrade_cri_if_required(node, dry_run=dry_run) + node.sudo('systemctl restart kubelet', dry_run=dry_run) + if dry_run: + cluster.log.debug("[dry-run] Upgraded Cri...") + return if workers: control_plane.sudo(f"kubectl uncordon {node_name}", hide=False) else: @@ -204,10 +206,11 @@ def run(self, res: DynamicResources) -> None: def _run(self, group: NodeGroup) -> None: cluster: KubernetesCluster = group.cluster packages.install(group, include=cluster.get_package_association(self.package_name, 'package_name')) + dry_run = utils.check_dry_run_status_active(cluster) if self.package_name == 'haproxy': - haproxy.restart(group) + haproxy.restart(group, dry_run) else: - keepalived.restart(group) + keepalived.restart(group, dry_run) def associations_changed(self, res: DynamicResources) -> bool: """