diff --git a/kubemarine/admission.py b/kubemarine/admission.py index ef3f7a478..63c36165d 100644 --- a/kubemarine/admission.py +++ b/kubemarine/admission.py @@ -235,7 +235,9 @@ def install_psp_task(cluster: KubernetesCluster): if not is_security_enabled(cluster.inventory): cluster.log.debug("Pod security disabled, skipping policies installation...") return - + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Installing Pod security policies...") + return first_control_plane = cluster.nodes["control-plane"].get_first_member() cluster.log.debug("Installing OOB policies...") @@ -327,7 +329,7 @@ def restart_pods_task(cluster: KubernetesCluster): if not is_restart: cluster.log.debug("'restart-pods' is disabled, pods won't be restarted") return - + dry_run = utils.check_dry_run_status_active(cluster) first_control_plane = cluster.nodes["control-plane"].get_first_member() cluster.log.debug("Drain-Uncordon all nodes to restart pods") @@ -335,20 +337,20 @@ def restart_pods_task(cluster: KubernetesCluster): for node in kube_nodes.get_ordered_members_list(): first_control_plane.sudo( kubernetes.prepare_drain_command(cluster, node.get_node_name(), disable_eviction=False), - hide=False) - first_control_plane.sudo("kubectl uncordon %s" % node.get_node_name(), hide=False) + hide=False, dry_run=dry_run) + first_control_plane.sudo("kubectl uncordon %s" % node.get_node_name(), hide=False, dry_run=dry_run) cluster.log.debug("Restarting daemon-sets...") daemon_sets = ruamel.yaml.YAML().load(list(first_control_plane.sudo("kubectl get ds -A -o yaml").values())[0].stdout) for ds in daemon_sets["items"]: - first_control_plane.sudo("kubectl rollout restart ds %s -n %s" % (ds["metadata"]["name"], ds["metadata"]["namespace"])) + first_control_plane.sudo("kubectl rollout restart ds %s -n %s" % (ds["metadata"]["name"], ds["metadata"]["namespace"]),dry_run=dry_run) # we do not know to wait for, only for system pods maybe cluster.log.debug("Waiting for system pods...") kubernetes.wait_for_any_pods(cluster, first_control_plane) -def update_kubeadm_configmap_psp(first_control_plane: NodeGroup, target_state: str) -> str: +def update_kubeadm_configmap_psp(first_control_plane: NodeGroup, target_state: str, dry_run=False) -> str: yaml = ruamel.yaml.YAML() # load kubeadm config map and retrieve cluster config @@ -369,22 +371,23 @@ def update_kubeadm_configmap_psp(first_control_plane: NodeGroup, target_state: s buf = io.StringIO() yaml.dump(kubeadm_cm, buf) filename = uuid.uuid4().hex - first_control_plane.put(buf, "/tmp/%s.yaml" % filename) - first_control_plane.sudo("kubectl apply -f /tmp/%s.yaml" % filename) - first_control_plane.sudo("rm -f /tmp/%s.yaml" % filename) + first_control_plane.put(buf, "/tmp/%s.yaml" % filename, dry_run=dry_run) + first_control_plane.sudo("kubectl apply -f /tmp/%s.yaml" % filename, dry_run=dry_run) + first_control_plane.sudo("rm -f /tmp/%s.yaml" % filename, dry_run=dry_run) return final_plugins_string def update_kubeadm_configmap(first_control_plane: NodeGroup, target_state: str) -> str: admission_impl = first_control_plane.cluster.inventory['rbac']['admission'] + dry_run = utils.check_dry_run_status_active(first_control_plane.cluster) if admission_impl == "psp": - return update_kubeadm_configmap_psp(first_control_plane, target_state) + return update_kubeadm_configmap_psp(first_control_plane, target_state, dry_run) else: # admission_impl == "pss": - return update_kubeadm_configmap_pss(first_control_plane, target_state) + return update_kubeadm_configmap_pss(first_control_plane, target_state, dry_run) -def update_kubeapi_config_psp(control_planes: NodeGroup, plugins_list: str): +def update_kubeapi_config_psp(control_planes: NodeGroup, plugins_list: str, dry_run=False): yaml = ruamel.yaml.YAML() for control_plane in control_planes.get_ordered_members_list(): @@ -399,7 +402,7 @@ def update_kubeapi_config_psp(control_planes: NodeGroup, plugins_list: str): # place updated config on control-plane buf = io.StringIO() yaml.dump(conf, buf) - control_plane.put(buf, "/etc/kubernetes/manifests/kube-apiserver.yaml", sudo=True) + control_plane.put(buf, "/etc/kubernetes/manifests/kube-apiserver.yaml", sudo=True, dry_run=dry_run) # force kube-apiserver pod restart, then wait for api to become available if control_planes.cluster.inventory['services']['cri']['containerRuntime'] == 'containerd': @@ -414,10 +417,11 @@ def update_kubeapi_config_psp(control_planes: NodeGroup, plugins_list: str): def update_kubeapi_config(control_planes: NodeGroup, options_list: str): admission_impl = control_planes.cluster.inventory['rbac']['admission'] + dry_run = utils.check_dry_run_status_active(control_planes.cluster) if admission_impl == "psp": - return update_kubeapi_config_psp(control_planes, options_list) + return update_kubeapi_config_psp(control_planes, options_list, dry_run) elif admission_impl == "pss": - return update_kubeapi_config_pss(control_planes, options_list) + return update_kubeapi_config_pss(control_planes, options_list, dry_run) def is_security_enabled(inventory: dict): admission_impl = inventory['rbac']['admission'] @@ -455,7 +459,7 @@ def apply_default_pss(cluster: KubernetesCluster): elif procedure_config["pod-security"] == "enabled" and current_config["pod-security"] == "disabled": return manage_pss(cluster, "install") else: - return manage_pss(cluster, "init") + return manage_pss(cluster, "init") def delete_default_pss(cluster: KubernetesCluster): @@ -466,13 +470,14 @@ def delete_default_pss(cluster: KubernetesCluster): def manage_privileged_from_file(group: NodeGroup, filename, manage_type): + dry_run = utils.check_dry_run_status_active(group.cluster) if manage_type not in ["apply", "delete"]: raise Exception("unexpected manage type for privileged policy") privileged_policy = utils.read_internal(os.path.join(policies_file_path, filename)) remote_path = tmp_filepath_pattern % filename - group.put(io.StringIO(privileged_policy), remote_path, backup=True, sudo=True) + group.put(io.StringIO(privileged_policy), remote_path, backup=True, sudo=True, dry_run=dry_run) - return group.sudo("kubectl %s -f %s" % (manage_type, remote_path), warn=True) + return group.sudo("kubectl %s -f %s" % (manage_type, remote_path), warn=True, dry_run=dry_run) def resolve_oob_scope(oob_policies_conf: Dict[str, Any], selector: str): @@ -509,6 +514,7 @@ def manage_policies(group: NodeGroup, manage_type, manage_scope): psp_to_manage = manage_scope.get(psp_list_option, None) roles_to_manage = manage_scope.get(roles_list_option, None) bindings_to_manage = manage_scope.get(bindings_list_option, None) + dry_run = utils.check_dry_run_status_active(group.cluster) if not psp_to_manage and not roles_to_manage and not bindings_to_manage: group.cluster.log.verbose("No policies to %s" % manage_type) @@ -517,9 +523,9 @@ def manage_policies(group: NodeGroup, manage_type, manage_scope): template = collect_policies_template(psp_to_manage, roles_to_manage, bindings_to_manage) filename = uuid.uuid4().hex remote_path = tmp_filepath_pattern % filename - group.put(io.StringIO(template), remote_path, backup=True, sudo=True) - result = group.sudo("kubectl %s -f %s" % (manage_type, remote_path), warn=True) - group.sudo("rm -f %s" % remote_path) + group.put(io.StringIO(template), remote_path, backup=True, sudo=True, dry_run=dry_run) + result = group.sudo("kubectl %s -f %s" % (manage_type, remote_path), warn=True, dry_run=dry_run) + group.sudo("rm -f %s" % remote_path, dry_run=dry_run) return result @@ -626,57 +632,58 @@ def manage_enrichment(inventory: dict, cluster: KubernetesCluster): def manage_pss(cluster: KubernetesCluster, manage_type: str): first_control_plane = cluster.nodes["control-plane"].get_first_member() control_planes = cluster.nodes["control-plane"] + dry_run = utils.check_dry_run_status_active(cluster) # 'apply' - change options in admission.yaml, PSS is enabled if manage_type == "apply": # set labels for predifined plugins namespaces and namespaces defined in procedure config - label_namespace_pss(cluster, manage_type) + label_namespace_pss(cluster, manage_type, dry_run) # copy admission config on control-planes copy_pss(control_planes) for control_plane in control_planes.get_ordered_members_list(): # force kube-apiserver pod restart, then wait for api to become available if control_plane.cluster.inventory['services']['cri']['containerRuntime'] == 'containerd': control_plane.call(utils.wait_command_successful, command="crictl rm -f " - "$(sudo crictl ps --name kube-apiserver -q)") + "$(sudo crictl ps --name kube-apiserver -q)", dry_run=dry_run) else: control_plane.call(utils.wait_command_successful, command="docker stop " "$(sudo docker ps -f 'name=k8s_kube-apiserver'" - " | awk '{print $1}')") - control_plane.call(utils.wait_command_successful, command="kubectl get pod -n kube-system") + " | awk '{print $1}')", dry_run=dry_run) + control_plane.call(utils.wait_command_successful, command="kubectl get pod -n kube-system", dry_run=dry_run) # 'install' - enable PSS elif manage_type == "install": # set labels for predifined plugins namespaces and namespaces defined in procedure config - label_namespace_pss(cluster, manage_type) + label_namespace_pss(cluster, manage_type, dry_run) # copy admission config on control-planes copy_pss(cluster.nodes["control-plane"]) cluster.log.debug("Updating kubeadm config map") - final_features_list = first_control_plane.call(update_kubeadm_configmap_pss, target_state="enabled") + final_features_list = first_control_plane.call(update_kubeadm_configmap_pss, target_state="enabled", dry_run=dry_run) # update api-server config on all control-planes cluster.log.debug("Updating kube-apiserver configs on control-planes") - cluster.nodes["control-plane"].call(update_kubeapi_config_pss, features_list=final_features_list) + cluster.nodes["control-plane"].call(update_kubeapi_config_pss, features_list=final_features_list, dry_run=dry_run) # 'init' make changes during init Kubernetes cluster elif manage_type == "init": cluster.log.debug("Updating kubeadm config map") - first_control_plane.call(update_kubeadm_configmap_pss, target_state="enabled") + first_control_plane.call(update_kubeadm_configmap_pss, target_state="enabled", dry_run=dry_run) # 'delete' - disable PSS elif manage_type == "delete": # set labels for predifined plugins namespaces and namespaces defined in procedure config - label_namespace_pss(cluster, manage_type) + label_namespace_pss(cluster, manage_type, dry_run) final_features_list = first_control_plane.call(update_kubeadm_configmap, target_state="disabled") # update api-server config on all control-planes cluster.log.debug("Updating kube-apiserver configs on control-planes") - cluster.nodes["control-plane"].call(update_kubeapi_config_pss, features_list=final_features_list) + cluster.nodes["control-plane"].call(update_kubeapi_config_pss, features_list=final_features_list, dry_run=dry_run) # erase PSS admission config cluster.log.debug("Erase admission configuration... %s" % admission_path) group = cluster.nodes["control-plane"] - group.sudo("rm -f %s" % admission_path, warn=True) + group.sudo("rm -f %s" % admission_path, warn=True, dry_run=dry_run) -def update_kubeapi_config_pss(control_planes: NodeGroup, features_list: str): +def update_kubeapi_config_pss(control_planes: NodeGroup, features_list: str, dry_run=False): yaml = ruamel.yaml.YAML() for control_plane in control_planes.get_ordered_members_list(): @@ -710,15 +717,17 @@ def update_kubeapi_config_pss(control_planes: NodeGroup, features_list: str): # force kube-apiserver pod restart, then wait for api to become available if control_plane.cluster.inventory['services']['cri']['containerRuntime'] == 'containerd': control_plane.call(utils.wait_command_successful, command="crictl rm -f " - "$(sudo crictl ps --name kube-apiserver -q)") + "$(sudo crictl ps --name kube-apiserver -q)", + dry_run=dry_run) else: control_plane.call(utils.wait_command_successful, command="docker stop " "$(sudo docker ps -f 'name=k8s_kube-apiserver'" - " | awk '{print $1}')") - control_plane.call(utils.wait_command_successful, command="kubectl get pod -n kube-system") + " | awk '{print $1}')", + dry_run=dry_run) + control_plane.call(utils.wait_command_successful, command="kubectl get pod -n kube-system", dry_run=dry_run) -def update_kubeadm_configmap_pss(first_control_plane: NodeGroup, target_state: str) -> str: +def update_kubeadm_configmap_pss(first_control_plane: NodeGroup, target_state: str, dry_run=False) -> str: yaml = ruamel.yaml.YAML() final_feature_list = "" @@ -727,7 +736,7 @@ def update_kubeadm_configmap_pss(first_control_plane: NodeGroup, target_state: s result = first_control_plane.sudo("kubectl get cm kubeadm-config -n kube-system -o yaml") kubeadm_cm = yaml.load(list(result.values())[0].stdout) cluster_config = yaml.load(kubeadm_cm["data"]["ClusterConfiguration"]) - + # update kubeadm config map with feature list if target_state == "enabled": if "feature-gates" in cluster_config["apiServer"]["extraArgs"]: @@ -762,9 +771,9 @@ def update_kubeadm_configmap_pss(first_control_plane: NodeGroup, target_state: s buf = io.StringIO() yaml.dump(kubeadm_cm, buf) filename = uuid.uuid4().hex - first_control_plane.put(buf, "/tmp/%s.yaml" % filename) - first_control_plane.sudo("kubectl apply -f /tmp/%s.yaml" % filename) - first_control_plane.sudo("rm -f /tmp/%s.yaml" % filename) + first_control_plane.put(buf, "/tmp/%s.yaml" % filename, dry_run=dry_run) + first_control_plane.sudo("kubectl apply -f /tmp/%s.yaml" % filename, dry_run=dry_run) + first_control_plane.sudo("rm -f /tmp/%s.yaml" % filename, dry_run=dry_run) return final_feature_list @@ -810,25 +819,31 @@ def update_finalized_inventory(cluster: KubernetesCluster, inventory_to_finalize return inventory_to_finalize + def copy_pss(group: NodeGroup): - if group.cluster.inventory['rbac']['admission'] != "pss": + if group.cluster.inventory['rbac']['admission'] != "pss": return - if group.cluster.context.get('initial_procedure') == 'manage_pss': - if not is_security_enabled(group.cluster.inventory) and \ - group.cluster.procedure_inventory["pss"]["pod-security"] != "enabled": - group.cluster.log.debug("Pod security disabled, skipping pod admission installation...") + + cluster = group.cluster + if cluster.context.get('initial_procedure') == 'manage_pss': + if not is_security_enabled(cluster.inventory) and \ + cluster.procedure_inventory["pss"]["pod-security"] != "enabled": + cluster.log.debug("Pod security disabled, skipping pod admission installation...") return - if group.cluster.context.get('initial_procedure') == 'install': + if cluster.context.get('initial_procedure') == 'install': if not is_security_enabled(group.cluster.inventory): - group.cluster.log.debug("Pod security disabled, skipping pod admission installation...") + cluster.log.debug("Pod security disabled, skipping pod admission installation...") return - defaults = group.cluster.inventory["rbac"]["pss"]["defaults"] - exemptions = group.cluster.inventory["rbac"]["pss"]["exemptions"] + defaults = cluster.inventory["rbac"]["pss"]["defaults"] + exemptions = cluster.inventory["rbac"]["pss"]["exemptions"] # create admission config from template and cluster.yaml admission_config = Template(utils.read_internal(admission_template))\ - .render(defaults=defaults,exemptions=exemptions) + .render(defaults=defaults, exemptions=exemptions) + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("Copying Admission config files") + return None # put admission config on every control-planes filename = uuid.uuid4().hex remote_path = tmp_filepath_pattern % filename @@ -841,7 +856,7 @@ def copy_pss(group: NodeGroup): return result -def label_namespace_pss(cluster: KubernetesCluster, manage_type: str): +def label_namespace_pss(cluster: KubernetesCluster, manage_type: str, dry_run=False): first_control_plane = cluster.nodes["control-plane"].get_first_member() # set/delete labels on predifined plugins namsespaces for plugin in cluster.inventory["plugins"]: @@ -852,34 +867,34 @@ def label_namespace_pss(cluster: KubernetesCluster, manage_type: str): cluster.log.debug(f"Set PSS labels on namespace {privileged_plugins[plugin]}") for mode in valid_modes: first_control_plane.sudo(f"kubectl label ns {privileged_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}=privileged --overwrite") + f"pod-security.kubernetes.io/{mode}=privileged --overwrite", dry_run=dry_run) first_control_plane.sudo(f"kubectl label ns {privileged_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}-version=latest --overwrite") + f"pod-security.kubernetes.io/{mode}-version=latest --overwrite", dry_run=dry_run) elif is_install and plugin in baseline_plugins.keys(): # set label 'pod-security.kubernetes.io/enforce: baseline' for kubernetes dashboard cluster.log.debug(f"Set PSS labels on namespace {baseline_plugins[plugin]}") for mode in valid_modes: first_control_plane.sudo(f"kubectl label ns {baseline_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}=baseline --overwrite") + f"pod-security.kubernetes.io/{mode}=baseline --overwrite", dry_run=dry_run) first_control_plane.sudo(f"kubectl label ns {baseline_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}-version=latest --overwrite") + f"pod-security.kubernetes.io/{mode}-version=latest --overwrite", dry_run=dry_run) elif manage_type == "delete": if is_install and plugin in privileged_plugins.keys(): # delete label 'pod-security.kubernetes.io/enforce: privileged' for local provisioner and ingress namespaces cluster.log.debug(f"Delete PSS labels from namespace {privileged_plugins[plugin]}") for mode in valid_modes: first_control_plane.sudo(f"kubectl label ns {privileged_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}- || true") + f"pod-security.kubernetes.io/{mode}- || true", dry_run=dry_run) first_control_plane.sudo(f"kubectl label ns {privileged_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}-version- || true") + f"pod-security.kubernetes.io/{mode}-version- || true", dry_run=dry_run) elif is_install and plugin in baseline_plugins.keys(): # delete 'pod-security.kubernetes.io/enforce: baseline' for kubernetes dashboard cluster.log.debug(f"Delete PSS labels from namespace {baseline_plugins[plugin]}") for mode in valid_modes: first_control_plane.sudo(f"kubectl label ns {baseline_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}- || true") + f"pod-security.kubernetes.io/{mode}- || true", dry_run=dry_run) first_control_plane.sudo(f"kubectl label ns {baseline_plugins[plugin]} " - f"pod-security.kubernetes.io/{mode}-version- || true") + f"pod-security.kubernetes.io/{mode}-version- || true", dry_run=dry_run) procedure_config = cluster.procedure_inventory["pss"] namespaces = procedure_config.get("namespaces") @@ -903,25 +918,25 @@ def label_namespace_pss(cluster: KubernetesCluster, manage_type: str): cluster.log.debug(f"Set PSS labels on {ns_name} namespace from defaults") for mode in default_modes: first_control_plane.sudo(f"kubectl label ns {ns_name} " - f"pod-security.kubernetes.io/{mode}={default_modes[mode]} --overwrite") + f"pod-security.kubernetes.io/{mode}={default_modes[mode]} --overwrite", dry_run=dry_run) if isinstance(namespace, dict): # set labels that are set in namespaces section cluster.log.debug(f"Set PSS labels on {ns_name} namespace") for item in list(namespace[ns_name]): first_control_plane.sudo(f"kubectl label ns {ns_name} " - f"pod-security.kubernetes.io/{item}={namespace[ns_name][item]} --overwrite") + f"pod-security.kubernetes.io/{item}={namespace[ns_name][item]} --overwrite", dry_run=dry_run) elif manage_type == "delete": # delete labels that are set in default section if default_modes: cluster.log.debug(f"Delete PSS labels on {ns_name} namespace from defaults") for mode in default_modes: - first_control_plane.sudo(f"kubectl label ns {ns_name} pod-security.kubernetes.io/{mode}-") + first_control_plane.sudo(f"kubectl label ns {ns_name} pod-security.kubernetes.io/{mode}-", dry_run=dry_run) # delete labels that are set in namespaces section cluster.log.debug(f"Delete PSS labels on {ns_name} namespace") if isinstance(namespace, dict): for item in list(namespace[ns_name]): first_control_plane.sudo(f"kubectl label ns {ns_name} " - f"pod-security.kubernetes.io/{item}-") + f"pod-security.kubernetes.io/{item}-", dry_run=dry_run) def check_inventory(cluster: KubernetesCluster): diff --git a/kubemarine/apt.py b/kubemarine/apt.py index 83c4fc6f7..f0a7eeb68 100644 --- a/kubemarine/apt.py +++ b/kubemarine/apt.py @@ -32,15 +32,17 @@ def backup_repo(group: NodeGroup) -> Optional[RunnersGroupResult]: if not group.cluster.inventory['services']['packages']['package_manager']['replace-repositories']: group.cluster.log.debug("Skipped - repos replacement disabled in configuration") return None + dry_run = utils.check_dry_run_status_active(group.cluster) # all files in directory will be renamed: xxx.repo -> xxx.repo.bak # if there already any files with ".bak" extension, they should not be renamed to ".bak.bak"! return group.sudo("find /etc/apt/ -type f -name '*.list' | " - "sudo xargs -t -iNAME mv -bf NAME NAME.bak") + "sudo xargs -t -iNAME mv -bf NAME NAME.bak", dry_run=dry_run) def add_repo(group: NodeGroup, repo_data: Union[List[str], Dict[str, dict], str]) -> RunnersGroupResult: - create_repo_file(group, repo_data, get_repo_file_name()) - return group.sudo(DEBIAN_HEADERS + 'apt clean && sudo apt update') + dry_run = utils.check_dry_run_status_active(group.cluster) + create_repo_file(group, repo_data, get_repo_file_name(), dry_run) + return group.sudo(DEBIAN_HEADERS + 'apt clean && sudo apt update', dry_run=dry_run) def get_repo_file_name() -> str: @@ -49,7 +51,7 @@ def get_repo_file_name() -> str: def create_repo_file(group: AbstractGroup[RunResult], repo_data: Union[List[str], Dict[str, dict], str], - repo_file: str) -> None: + repo_file: str, dry_run=False) -> None: # if repo_data is list, then convert it to string using join if isinstance(repo_data, list): repo_data_str = "\n".join(repo_data) + "\n" @@ -57,9 +59,8 @@ def create_repo_file(group: AbstractGroup[RunResult], raise Exception("Not supported repositories format for apt package manager") else: repo_data_str = utils.read_external(repo_data) - repo_data_stream = io.StringIO(repo_data_str) - group.put(repo_data_stream, repo_file, sudo=True) + group.put(repo_data_stream, repo_file, sudo=True, dry_run=dry_run) def clean(group: NodeGroup) -> RunnersGroupResult: @@ -90,7 +91,7 @@ def install(group: AbstractGroup[GROUP_RUN_TYPE], include: Union[str, List[str]] command = get_install_cmd(include, exclude) - return group.sudo(command, callback=callback) + return group.sudo(command, callback=callback, dry_run=utils.check_dry_run_status_active(group.cluster)) def remove(group: AbstractGroup[GROUP_RUN_TYPE], include: Union[str, List[str]] = None, exclude: Union[str, List[str]] = None, @@ -107,7 +108,7 @@ def remove(group: AbstractGroup[GROUP_RUN_TYPE], include: Union[str, List[str]] exclude = ','.join(exclude) command += ' --exclude=%s' % exclude - return group.sudo(command, warn=warn, hide=hide) + return group.sudo(command, warn=warn, hide=hide, dry_run=utils.check_dry_run_status_active(group.cluster)) def upgrade(group: AbstractGroup[GROUP_RUN_TYPE], include: Union[str, List[str]] = None, @@ -125,7 +126,7 @@ def upgrade(group: AbstractGroup[GROUP_RUN_TYPE], include: Union[str, List[str]] exclude = ','.join(exclude) command += ' --exclude=%s' % exclude - return group.sudo(command) + return group.sudo(command, dry_run=utils.check_dry_run_status_active(group.cluster)) def no_changes_found(action: str, result: RunnersResult) -> bool: diff --git a/kubemarine/audit.py b/kubemarine/audit.py index 66f9638dc..eb9003738 100644 --- a/kubemarine/audit.py +++ b/kubemarine/audit.py @@ -68,6 +68,9 @@ def install(group: NodeGroup) -> Optional[RunnersGroupResult]: else: log.debug(f'Auditd package is not installed on {not_installed_hosts}, installing...') + if utils.check_dry_run_status_active(cluster): + return None + collector = CollectorCallback(cluster) with cluster.make_group(not_installed_hosts).new_executor() as exe: for node in exe.group.get_ordered_members_list(): @@ -93,7 +96,10 @@ def apply_audit_rules(group: NodeGroup) -> RunnersGroupResult: rules_content = " \n".join(cluster.inventory['services']['audit']['rules']) utils.dump_file(cluster, rules_content, 'audit.rules') + if utils.check_dry_run_status_active(group.cluster): + return None collector = CollectorCallback(cluster) + with group.new_executor() as exe: for node in exe.group.get_ordered_members_list(): host = node.get_host() diff --git a/kubemarine/core/cluster.py b/kubemarine/core/cluster.py index 01d230104..206f3d711 100755 --- a/kubemarine/core/cluster.py +++ b/kubemarine/core/cluster.py @@ -341,7 +341,8 @@ def dump_finalized_inventory(self) -> None: data = yaml.dump(inventory_for_dump) finalized_filename = "cluster_finalized.yaml" utils.dump_file(self, data, finalized_filename) - utils.dump_file(self, data, finalized_filename, dump_location=False) + if not utils.check_dry_run_status_active(self): + utils.dump_file(self, data, finalized_filename, dump_location=False) def preserve_inventory(self) -> None: self.log.debug("Start preserving of the information about the procedure.") diff --git a/kubemarine/core/executor.py b/kubemarine/core/executor.py index 626316ded..4d712ea67 100644 --- a/kubemarine/core/executor.py +++ b/kubemarine/core/executor.py @@ -388,7 +388,6 @@ def get_last_results(self) -> Dict[str, TokenizedResult]: def flush(self) -> None: """ Flushes the connections' queue and returns grouped result - :return: grouped tokenized results per connection. """ self._check_closed() diff --git a/kubemarine/core/flow.py b/kubemarine/core/flow.py index 3d4f8ea9e..a28b7f8dc 100755 --- a/kubemarine/core/flow.py +++ b/kubemarine/core/flow.py @@ -133,8 +133,8 @@ def run_actions(resources: res.DynamicResources, actions: Sequence[action.Action timestamp = utils.get_current_timestamp_formatted() inventory_file_basename = os.path.basename(resources.inventory_filepath) utils.dump_file(context, stream, "%s_%s" % (inventory_file_basename, str(timestamp))) - - resources.recreate_inventory() + if not utils.check_dry_run_status_active(last_cluster): + resources.recreate_inventory() _post_process_actions_group(last_cluster, context, successfully_performed) successfully_performed = [] last_cluster = None @@ -186,9 +186,8 @@ def run_tasks(resources: res.DynamicResources, tasks, cumulative_points=None, ta cluster = resources.cluster() if args.get('without_act', False): + cluster.context["dry_run"] = True resources.context['preserve_inventory'] = False - cluster.log.debug('\nFurther acting manually disabled') - return init_tasks_flow(cluster) run_tasks_recursive(tasks, final_list, cluster, cumulative_points, []) @@ -320,7 +319,7 @@ def new_common_parser(cli_help: str) -> argparse.ArgumentParser: help='define main cluster configuration file') parser.add_argument('--ansible-inventory-location', - default='./ansible-inventory.ini', + default='ansible-inventory.ini', help='auto-generated ansible-compatible inventory file location') parser.add_argument('--dump-location', diff --git a/kubemarine/core/group.py b/kubemarine/core/group.py index c245471d6..ab72b5a88 100755 --- a/kubemarine/core/group.py +++ b/kubemarine/core/group.py @@ -24,6 +24,8 @@ Callable, Dict, List, Union, Any, TypeVar, Mapping, Iterator, Optional, Iterable, Generic, Set, cast ) +import fabric + from kubemarine.core import utils, log, errors from kubemarine.core.executor import ( RawExecutor, Token, GenericResult, RunnersResult, HostToResult, Callback, TokenizedResult, @@ -266,6 +268,36 @@ def result(self) -> RunnersGroupResult: GROUP_SELF = TypeVar('GROUP_SELF', bound='AbstractGroup[Union[RunnersGroupResult, Token]]') +def _handle_dry_run(fn: Callable) -> Callable: + """ + Method is a decorator that handles internal streaming of output (hide=False) of fabric (invoke). + Note! This decorator should be the outermost. + :param fn: Origin function to apply annotation to + :return: Validation wrapper function + """ + def do_dry_run(self: NodeGroup, *args, **kwargs) -> NodeGroupResult: + results: Dict[str, str] = {} + + if kwargs.get("dry_run"): + if fn.__name__ == "put": + self.cluster.log.verbose("Local file \"%s\" is being transferred to remote file \"%s\" on nodes %s with options %s" + % (args[0], args[1], list(self.nodes), kwargs)) + else: + self.cluster.log.verbose('Performing %s %s on nodes %s with options: %s' % ( + fn.__name__, args[0], list(self.nodes), kwargs)) + return NodeGroupResult(self.cluster, results) + elif "dry_run" in kwargs.keys(): + del kwargs["dry_run"] + try: + results = fn(self, *args, **kwargs) + return results + except fabric.group.GroupException as e: + results = e.result + raise + + return do_dry_run + + class AbstractGroup(Generic[GROUP_RUN_TYPE], ABC): def __init__(self, ips: Iterable[Union[str, GROUP_SELF]], cluster: object): from kubemarine.core.cluster import KubernetesCluster @@ -297,6 +329,7 @@ def __eq__(self, other: object) -> bool: def __ne__(self, other: object) -> bool: return not self == other + @_handle_dry_run def run(self, command: str, warn: bool = False, hide: bool = True, env: Dict[str, str] = None, timeout: int = None, @@ -308,6 +341,7 @@ def run(self, command: str, return self._run("run", command, caller, warn=warn, hide=hide, env=env, timeout=timeout, callback=callback) + @_handle_dry_run def sudo(self, command: str, warn: bool = False, hide: bool = True, env: Dict[str, str] = None, timeout: int = None, @@ -328,9 +362,11 @@ def _run(self, do_type: str, command: str, caller: Optional[Dict[str, object]], def get(self, remote_file: str, local_file: str) -> None: pass + @_handle_dry_run def put(self, local_file: Union[io.StringIO, str], remote_file: str, backup: bool = False, sudo: bool = False, mkdir: bool = False, immutable: bool = False) -> None: + if isinstance(local_file, io.StringIO): self.cluster.log.verbose("Text is being transferred to remote file \"%s\" on nodes %s" % (remote_file, list(self.nodes))) diff --git a/kubemarine/core/utils.py b/kubemarine/core/utils.py index 4d1688a2d..055c3c197 100755 --- a/kubemarine/core/utils.py +++ b/kubemarine/core/utils.py @@ -147,7 +147,11 @@ def make_ansible_inventory(location, c): config_compiled += '\n' + string config_compiled += '\n\n' - dump_file({}, config_compiled, location, dump_location=False) + dump_location = False + args = cluster.context.get('execution_arguments') + if args.get('without_act', None): + dump_location = True + dump_file(cluster, config_compiled, location, dump_location=dump_location) def get_current_timestamp_formatted(): @@ -203,7 +207,7 @@ def dump_file(context, data: object, filename: str, cluster = context context = cluster.context - args = context['execution_arguments'] + args = context.get('execution_arguments', {}) if args.get('disable_dump', True) \ and not (filename in ClusterStorage.PRESERVED_DUMP_FILES and context['preserve_inventory']): return @@ -229,11 +233,14 @@ def get_dump_filepath(context, filename): return get_external_resource_path(os.path.join(context['execution_arguments']['dump_location'], 'dump', filename)) -def wait_command_successful(g, command, retries=15, timeout=5, warn=True, hide=False): +def wait_command_successful(g, command, retries=15, timeout=5, warn=True, hide=False, dry_run=False): from kubemarine.core.group import NodeGroup group: NodeGroup = g log = group.cluster.log + if dry_run: + log.debug("[dry-run] Command succeeded") + return while retries > 0: log.debug("Waiting for command to succeed, %s retries left" % retries) @@ -470,6 +477,10 @@ def _test_version(version: str, numbers_amount: int) -> List[int]: raise ValueError(f'Incorrect version \"{version}\" format, expected version pattern is \"{expected_pattern}\"') +def check_dry_run_status_active(cluster): + return cluster.context.get("dry_run") + + class ClusterStorage: """ File preservation: diff --git a/kubemarine/coredns.py b/kubemarine/coredns.py index e45d5dad0..ea0fc7c77 100644 --- a/kubemarine/coredns.py +++ b/kubemarine/coredns.py @@ -148,18 +148,18 @@ def generate_configmap(inventory: dict) -> str: return config + '\n' -def apply_configmap(cluster: KubernetesCluster, config: str) -> RunnersGroupResult: +def apply_configmap(cluster: KubernetesCluster, config: str, dry_run=False) -> RunnersGroupResult: utils.dump_file(cluster, config, 'coredns-configmap.yaml') group = cluster.make_group_from_roles(['control-plane', 'worker']).get_final_nodes() - group.put(io.StringIO(config), '/etc/kubernetes/coredns-configmap.yaml', backup=True, sudo=True) + group.put(io.StringIO(config), '/etc/kubernetes/coredns-configmap.yaml', backup=True, sudo=True, dry_run=dry_run) return cluster.nodes['control-plane'].get_final_nodes().get_first_member()\ .sudo('kubectl apply -f /etc/kubernetes/coredns-configmap.yaml && ' - 'sudo kubectl rollout restart -n kube-system deployment/coredns') + 'sudo kubectl rollout restart -n kube-system deployment/coredns', dry_run=dry_run) -def apply_patch(cluster: KubernetesCluster) -> Union[RunnersGroupResult, str]: +def apply_patch(cluster: KubernetesCluster, dry_run=False) -> Union[RunnersGroupResult, str]: apply_command = '' for config_type in ['deployment']: @@ -177,11 +177,11 @@ def apply_patch(cluster: KubernetesCluster) -> Union[RunnersGroupResult, str]: utils.dump_file(cluster, config, filename) group = cluster.make_group_from_roles(['control-plane', 'worker']).get_final_nodes() - group.put(io.StringIO(config), filepath, backup=True, sudo=True) + group.put(io.StringIO(config), filepath, backup=True, sudo=True, dry_run=dry_run) apply_command = 'kubectl patch %s coredns -n kube-system --type merge -p \"$(sudo cat %s)\"' % (config_type, filepath) if apply_command == '': return 'Nothing to patch' - return cluster.nodes['control-plane'].get_final_nodes().get_first_member().sudo(apply_command) + return cluster.nodes['control-plane'].get_final_nodes().get_first_member().sudo(apply_command, dry_run=dry_run) diff --git a/kubemarine/cri/containerd.py b/kubemarine/cri/containerd.py index c111012c3..2388b9a3f 100755 --- a/kubemarine/cri/containerd.py +++ b/kubemarine/cri/containerd.py @@ -26,11 +26,15 @@ def install(group: NodeGroup) -> RunnersGroupResult: + + if utils.check_dry_run_status_active(group.cluster): + group.cluster.log.debug("[dry-run] Installing Containerd") + return None collector = CollectorCallback(group.cluster) + with group.new_executor() as exe: for node in exe.group.get_ordered_members_list(): os_specific_associations = exe.cluster.get_associations_for_node(node.get_host(), 'containerd') - exe.cluster.log.debug("Installing latest containerd and podman on %s node" % node.get_node_name()) # always install latest available containerd and podman packages.install(node, include=os_specific_associations['package_name'], callback=collector) @@ -52,8 +56,10 @@ def configure(group: NodeGroup) -> RunnersGroupResult: log.debug("Uploading crictl configuration for containerd...") crictl_config = yaml.dump({"runtime-endpoint": "unix:///run/containerd/containerd.sock"}) + utils.dump_file(cluster, crictl_config, 'crictl.yaml') - group.put(StringIO(crictl_config), '/etc/crictl.yaml', backup=True, sudo=True) + dry_run = utils.check_dry_run_status_active(cluster) + group.put(StringIO(crictl_config), '/etc/crictl.yaml', backup=True, sudo=True, dry_run=dry_run) config_string = "" # double loop is used to make sure that no "simple" `key: value` pairs are accidentally assigned to sections @@ -93,20 +99,23 @@ def configure(group: NodeGroup) -> RunnersGroupResult: if registry_configs[auth_registry].get('auth', {}).get('auth', ''): auth_registries['auths'][auth_registry]['auth'] = registry_configs[auth_registry]['auth']['auth'] auth_json = json.dumps(auth_registries) - group.put(StringIO(auth_json), "/etc/containers/auth.json", backup=True, sudo=True) - group.sudo("chmod 600 /etc/containers/auth.json") + group.put(StringIO(auth_json), "/etc/containers/auth.json", backup=True, sudo=True, dry_run=dry_run) + group.sudo("chmod 600 /etc/containers/auth.json", dry_run=dry_run) if insecure_registries: log.debug("Uploading podman configuration...") podman_registries = f"[registries.insecure]\nregistries = {insecure_registries}\n" - utils.dump_file(cluster, podman_registries, 'podman_registries.conf') - group.sudo("mkdir -p /etc/containers/") - group.put(StringIO(podman_registries), "/etc/containers/registries.conf", backup=True, sudo=True) + utils.dump_file(group.cluster, podman_registries, 'podman_registries.conf') + group.sudo("mkdir -p /etc/containers/", dry_run=dry_run) + group.put(StringIO(podman_registries), "/etc/containers/registries.conf", backup=True, sudo=True, dry_run=dry_run) else: log.debug("Removing old podman configuration...") - group.sudo("rm -f /etc/containers/registries.conf") + group.sudo("rm -f /etc/containers/registries.conf", dry_run=dry_run) utils.dump_file(cluster, config_string, 'containerd-config.toml') + if dry_run: + return None collector = CollectorCallback(cluster) + with group.new_executor() as exe: for node in exe.group.get_ordered_members_list(): os_specific_associations = exe.cluster.get_associations_for_node(node.get_host(), 'containerd') diff --git a/kubemarine/cri/docker.py b/kubemarine/cri/docker.py index 00da8e2b3..b11ed8c9e 100755 --- a/kubemarine/cri/docker.py +++ b/kubemarine/cri/docker.py @@ -22,6 +22,9 @@ def install(group: NodeGroup) -> RunnersGroupResult: cluster = group.cluster + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Installing Docker") + return None collector = CollectorCallback(cluster) with group.new_executor() as exe: for node in exe.group.get_ordered_members_list(): @@ -58,6 +61,9 @@ def configure(group: NodeGroup) -> RunnersGroupResult: settings_json = json.dumps(cluster.inventory["services"]['cri']['dockerConfig'], sort_keys=True, indent=4) utils.dump_file(cluster, settings_json, 'docker-daemon.json') + if utils.check_dry_run_status_active(cluster): + group.cluster.log.debug("[dry-run] Configuring Docker") + return collector = CollectorCallback(cluster) with group.new_executor() as exe: @@ -76,9 +82,10 @@ def configure(group: NodeGroup) -> RunnersGroupResult: return collector.result -def prune(group: NodeGroup) -> RunnersGroupResult: +def prune(group: NodeGroup, dry_run=False) -> RunnersGroupResult: return group.sudo('docker container stop $(sudo docker container ls -aq); ' 'sudo docker container rm $(sudo docker container ls -aq); ' 'sudo docker system prune -a -f; ' - # kill all containerd-shim processes, so that no orphan containers remain - 'sudo pkill -9 -f "^containerd-shim"', warn=True) + # kill all containerd-shim processes, so that no orphan containers remain + 'sudo pkill -9 -f "^containerd-shim"', warn=True, + dry_run=dry_run) diff --git a/kubemarine/haproxy.py b/kubemarine/haproxy.py index ab8fd025e..37a986511 100644 --- a/kubemarine/haproxy.py +++ b/kubemarine/haproxy.py @@ -159,9 +159,11 @@ def uninstall(group: NodeGroup) -> RunnersGroupResult: return packages.remove(group, include=['haproxy', 'rh-haproxy18']) -def restart(group: NodeGroup) -> None: +def restart(group: NodeGroup, dry_run: bool = False) -> None: cluster: KubernetesCluster = group.cluster cluster.log.debug("Restarting haproxy in all group...") + if dry_run: + return with group.new_executor() as exe: for node in exe.group.get_ordered_members_list(): service_name = _get_associations_for_node(node)['service_name'] diff --git a/kubemarine/k8s_certs.py b/kubemarine/k8s_certs.py index 2b41cd48a..3d5762a94 100644 --- a/kubemarine/k8s_certs.py +++ b/kubemarine/k8s_certs.py @@ -16,12 +16,14 @@ from kubemarine import kubernetes from kubemarine.core.cluster import KubernetesCluster from kubemarine.core.group import NodeGroup +from kubemarine.core import utils def k8s_certs_overview(control_planes: NodeGroup) -> None: + dry_run = utils.check_dry_run_status_active(control_planes.cluster) for control_plane in control_planes.get_ordered_members_list(): control_planes.cluster.log.debug(f"Checking certs expiration for control_plane {control_plane.get_node_name()}") - control_plane.sudo("kubeadm certs check-expiration", hide=False) + control_plane.sudo("kubeadm certs check-expiration", hide=False, dry_run=dry_run) def renew_verify(inventory: dict, cluster: KubernetesCluster) -> dict: @@ -36,16 +38,16 @@ def renew_verify(inventory: dict, cluster: KubernetesCluster) -> dict: def renew_apply(control_planes: NodeGroup) -> None: log = control_planes.cluster.log - + dry_run = utils.check_dry_run_status_active(control_planes.cluster) procedure = control_planes.cluster.procedure_inventory["kubernetes"] cert_list = procedure["cert-list"] for cert in cert_list: - control_planes.sudo(f"kubeadm certs renew {cert}") + control_planes.sudo(f"kubeadm certs renew {cert}", dry_run=dry_run) if "all" in cert_list or "admin.conf" in cert_list: # need to update cluster-admin config - kubernetes.copy_admin_config(log, control_planes) + kubernetes.copy_admin_config(log, control_planes, dry_run=dry_run) control_planes.call(force_renew_kubelet_serving_certs) @@ -60,19 +62,20 @@ def force_restart_control_plane(control_planes: NodeGroup) -> None: cri_impl = control_planes.cluster.inventory['services']['cri']['containerRuntime'] restart_containers = ["etcd", "kube-scheduler", "kube-apiserver", "kube-controller-manager"] c_filter = "grep -e %s" % " -e ".join(restart_containers) - + dry_run = utils.check_dry_run_status_active(control_planes.cluster) if cri_impl == "docker": - control_planes.sudo("sudo docker container rm -f $(sudo docker ps -a | %s | awk '{ print $1 }')" % c_filter, warn=True) + control_planes.sudo("sudo docker container rm -f $(sudo docker ps -a | %s | awk '{ print $1 }')" % c_filter, warn=True, dry_run=dry_run) else: - control_planes.sudo("sudo crictl rm -f $(sudo crictl ps -a | %s | awk '{ print $1 }')" % c_filter, warn=True) + control_planes.sudo("sudo crictl rm -f $(sudo crictl ps -a | %s | awk '{ print $1 }')" % c_filter, warn=True, dry_run=dry_run) def force_renew_kubelet_serving_certs(control_planes: NodeGroup) -> None: # Delete *serving* kubelet cert (kubelet.crt) and restart kubelet to create new up-to-date cert. # Client kubelet cert (kubelet.conf) is assumed to be updated automatically by kubelet. + dry_run = utils.check_dry_run_status_active(control_planes.cluster) for control_plane in control_planes.get_ordered_members_list(): - control_plane.sudo(f"rm -f /var/lib/kubelet/pki/kubelet.crt /var/lib/kubelet/pki/kubelet.key") - control_planes.sudo("systemctl restart kubelet") + control_plane.sudo(f"rm -f /var/lib/kubelet/pki/kubelet.crt /var/lib/kubelet/pki/kubelet.key", dry_run=dry_run) + control_planes.sudo("systemctl restart kubelet", dry_run=dry_run) def verify_all_is_absent_or_single(cert_list: List[str]) -> None: diff --git a/kubemarine/keepalived.py b/kubemarine/keepalived.py index 98dab6427..b388f001e 100644 --- a/kubemarine/keepalived.py +++ b/kubemarine/keepalived.py @@ -190,9 +190,11 @@ def uninstall(group: NodeGroup) -> RunnersGroupResult: return packages.remove(group, include='keepalived') -def restart(group: NodeGroup) -> None: +def restart(group: NodeGroup, dry_run=False) -> None: cluster: KubernetesCluster = group.cluster cluster.log.debug("Restarting keepalived in all group...") + if dry_run: + return with group.new_executor() as exe: for node in exe.group.get_ordered_members_list(): service_name = cluster.get_package_association_for_node( @@ -258,6 +260,7 @@ def generate_config(inventory: dict, node: NodeConfig) -> str: def configure(group: NodeGroup) -> RunnersGroupResult: cluster: KubernetesCluster = group.cluster log = cluster.log + dry_run = utils.check_dry_run_status_active(group.cluster) with group.new_executor() as exe: for node in exe.group.get_ordered_members_list(): @@ -267,15 +270,15 @@ def configure(group: NodeGroup) -> RunnersGroupResult: package_associations = cluster.get_associations_for_node(node.get_host(), 'keepalived') configs_directory = '/'.join(package_associations['config_location'].split('/')[:-1]) - exe.group.sudo('mkdir -p %s' % configs_directory) + exe.group.sudo('mkdir -p %s' % configs_directory, dry_run=dry_run) config = generate_config(cluster.inventory, node.get_config()) utils.dump_file(cluster, config, 'keepalived_%s.conf' % node_name) - node.put(io.StringIO(config), package_associations['config_location'], sudo=True) + node.put(io.StringIO(config), package_associations['config_location'], sudo=True, dry_run=dry_run) - log.debug(group.sudo('ls -la %s' % package_associations['config_location'])) + log.debug(group.sudo('ls -la %s' % package_associations['config_location'], dry_run=dry_run)) - restart(group) + restart(group, dry_run) - return group.sudo('systemctl status %s' % package_associations['service_name'], warn=True) + return group.sudo('systemctl status %s' % package_associations['service_name'], warn=True, dry_run=dry_run) diff --git a/kubemarine/kubernetes/__init__.py b/kubemarine/kubernetes/__init__.py index b552bd2e1..dd464bc7b 100644 --- a/kubemarine/kubernetes/__init__.py +++ b/kubemarine/kubernetes/__init__.py @@ -207,6 +207,9 @@ def reset_installation_env(group: NodeGroup): cluster: KubernetesCluster = group.cluster + if utils.check_dry_run_status_active(cluster): + return + drain_timeout = cluster.procedure_inventory.get('drain_timeout') grace_period = cluster.procedure_inventory.get('grace_period') @@ -324,40 +327,48 @@ def delete_nodes(group: NodeGroup): def install(group: NodeGroup) -> RunnersGroupResult: cluster: KubernetesCluster = group.cluster log = cluster.log - + dry_run = utils.check_dry_run_status_active(cluster) with group.new_executor() as exe: log.debug("Making systemd unit...") for node in exe.group.get_ordered_members_list(): - node.sudo('rm -rf /etc/systemd/system/kubelet*') + node.sudo('rm -rf /etc/systemd/system/kubelet*', dry_run=dry_run) node.get_node_name() template = Template(utils.read_internal('templates/kubelet.service.j2')).render( hostname=node.get_node_name()) log.debug("Uploading to '%s'..." % node.get_host()) - node.put(io.StringIO(template + "\n"), '/etc/systemd/system/kubelet.service', sudo=True) - node.sudo("chmod 600 /etc/systemd/system/kubelet.service") + node.put(io.StringIO(template + "\n"), '/etc/systemd/system/kubelet.service', sudo=True, dry_run=dry_run) + node.sudo("chmod 644 /etc/systemd/system/kubelet.service", dry_run=dry_run) log.debug("\nReloading systemd daemon...") system.reload_systemctl(exe.group) - exe.group.sudo('systemctl enable kubelet') - - return group.sudo('systemctl status kubelet', warn=True) + exe.group.sudo('systemctl enable kubelet', dry_run=dry_run) + return group.sudo('systemctl status kubelet', warn=True, dry_run=dry_run) def join_other_control_planes(group: NodeGroup) -> RunnersGroupResult: other_control_planes_group = group.get_ordered_members_list()[1:] - - join_dict = group.cluster.context["join_dict"] + dry_run = utils.check_dry_run_status_active(group.cluster) + log = group.cluster.log + join_dict = group.cluster.context.get("join_dict") for node in other_control_planes_group: + if dry_run: + log.debug('[dry-run]Joining control-plane \'%s\'...' % node['name']) + continue join_control_plane(group.cluster, node, join_dict) - group.cluster.log.debug("Verifying installation...") + log.debug("Verifying installation...") first_control_plane = group.get_first_member() - return first_control_plane.sudo("kubectl get pods --all-namespaces -o=wide") + return first_control_plane.sudo("kubectl get pods --all-namespaces -o=wide", dry_run=dry_run) def join_new_control_plane(group: NodeGroup): - join_dict = get_join_dict(group) + dry_run = utils.check_dry_run_status_active(group.cluster) + log = group.cluster.log + join_dict = get_join_dict(group, dry_run=dry_run) for node in group.get_ordered_members_list(): + if dry_run: + log.debug('[dry-run]Joining control-plane \'%s\'...' % node['name']) + continue join_control_plane(group.cluster, node, join_dict) @@ -469,10 +480,10 @@ def local_admin_config(node: NodeGroup) -> Iterator[str]: node.sudo(f'rm -f {temp_filepath}') -def copy_admin_config(logger: log.EnhancedLogger, nodes: AbstractGroup[RunResult]) -> None: +def copy_admin_config(logger: log.EnhancedLogger, nodes: AbstractGroup[RunResult], dry_run=False) -> None: logger.debug("Setting up admin-config...") command = "mkdir -p /root/.kube && sudo cp -f /etc/kubernetes/admin.conf /root/.kube/config" - nodes.sudo(command) + nodes.sudo(command, dry_run=dry_run) def fetch_admin_config(cluster: KubernetesCluster) -> str: @@ -498,10 +509,16 @@ def fetch_admin_config(cluster: KubernetesCluster) -> str: return kubeconfig_filename -def get_join_dict(group: NodeGroup) -> dict: +def get_join_dict(group: NodeGroup, dry_run=False) -> dict: + join_dict = {} + if dry_run: + join_dict['discovery-token-ca-cert-hash'] = None + join_dict['token'] = None + return join_dict cluster: KubernetesCluster = group.cluster first_control_plane = cluster.nodes["control-plane"].get_first_member() token_result = first_control_plane.sudo("kubeadm token create --print-join-command", hide=False) + join_strings = list(token_result.values())[0].stdout.rstrip("\n") join_dict = {"worker_join_command": join_strings} @@ -520,6 +537,7 @@ def get_join_dict(group: NodeGroup) -> dict: def init_first_control_plane(group: NodeGroup) -> None: cluster: KubernetesCluster = group.cluster log = cluster.log + dry_run = utils.check_dry_run_status_active(cluster) first_control_plane = group.get_first_member() node_config = first_control_plane.get_config() @@ -555,8 +573,9 @@ def init_first_control_plane(group: NodeGroup) -> None: utils.dump_file(cluster, config, 'init-config_%s.yaml' % node_name) log.debug("Uploading init config to initial control_plane...") - first_control_plane.sudo("mkdir -p /etc/kubernetes") - first_control_plane.put(io.StringIO(config), '/etc/kubernetes/init-config.yaml', sudo=True) + + first_control_plane.sudo("mkdir -p /etc/kubernetes", dry_run=dry_run) + first_control_plane.put(io.StringIO(config), '/etc/kubernetes/init-config.yaml', sudo=True, dry_run=dry_run) # put control-plane patches create_kubeadm_patches_for_node(cluster, first_control_plane) @@ -565,13 +584,16 @@ def init_first_control_plane(group: NodeGroup) -> None: first_control_plane.call(admission.copy_pss) log.debug("Initializing first control_plane...") + result = first_control_plane.sudo( "kubeadm init" " --upload-certs" " --config=/etc/kubernetes/init-config.yaml" " --ignore-preflight-errors='" + cluster.inventory['services']['kubeadm_flags']['ignorePreflightErrors'] + "'" " --v=5", - hide=False) + hide=False, dry_run=dry_run) + if dry_run: + return copy_admin_config(log, first_control_plane) @@ -630,6 +652,9 @@ def wait_uncordon(node: NodeGroup): def wait_for_nodes(group: NodeGroup): cluster: KubernetesCluster = group.cluster log = cluster.log + if utils.check_dry_run_status_active(group.cluster): + log.debug("[dry-run] All nodes are ready!") + return first_control_plane = cluster.nodes["control-plane"].get_first_member() node_names = group.get_nodes_names() @@ -676,8 +701,8 @@ def wait_for_nodes(group: NodeGroup): def init_workers(group: NodeGroup) -> None: cluster: KubernetesCluster = group.cluster - join_dict = cluster.context.get("join_dict", get_join_dict(group)) - + dry_run = utils.check_dry_run_status_active(cluster) + join_dict = cluster.context.get("join_dict", get_join_dict(group, dry_run)) join_config = { 'apiVersion': group.cluster.inventory["services"]["kubeadm"]['apiVersion'], 'kind': 'JoinConfiguration', @@ -710,8 +735,8 @@ def init_workers(group: NodeGroup) -> None: utils.dump_file(cluster, config, 'join-config-workers.yaml') - group.sudo("mkdir -p /etc/kubernetes") - group.put(io.StringIO(config), '/etc/kubernetes/join-config.yaml', sudo=True) + group.sudo("mkdir -p /etc/kubernetes", dry_run=dry_run) + group.put(io.StringIO(config), '/etc/kubernetes/join-config.yaml', sudo=True, dry_run=dry_run) # put control-plane patches for node in group.get_ordered_members_list(): @@ -724,7 +749,7 @@ def init_workers(group: NodeGroup) -> None: "kubeadm join --config=/etc/kubernetes/join-config.yaml" " --ignore-preflight-errors='" + cluster.inventory['services']['kubeadm_flags']['ignorePreflightErrors'] + "'" " --v=5", - hide=False) + hide=False, dry_run=dry_run) def apply_labels(group: NodeGroup) -> RunnersGroupResult: @@ -732,6 +757,9 @@ def apply_labels(group: NodeGroup) -> RunnersGroupResult: log = cluster.log log.debug("Applying additional labels for nodes") + if utils.check_dry_run_status_active(group.cluster): + log.debug("[dry-run]Successfully applied additional labels") + return None # TODO: Add "--overwrite-labels" switch # TODO: Add labels validation after applying control_plane = cluster.nodes["control-plane"].get_first_member() @@ -755,6 +783,10 @@ def apply_taints(group: NodeGroup) -> RunnersGroupResult: log = cluster.log log.debug("Applying additional taints for nodes") + + if utils.check_dry_run_status_active(group.cluster): + log.debug("[dry-run]Successfully applied additional taints") + return None control_plane = cluster.nodes["control-plane"].get_first_member() with control_plane.new_executor() as exe: for node in group.get_ordered_members_configs_list(): @@ -796,7 +828,7 @@ def get_kubeadm_config(inventory: dict): return f'{kubeadm_kubelet}---\n{kubeadm}' -def upgrade_first_control_plane(upgrade_group: NodeGroup, cluster: KubernetesCluster, **drain_kwargs): +def upgrade_first_control_plane(upgrade_group: NodeGroup, cluster: KubernetesCluster, dry_run=False, **drain_kwargs): version = cluster.inventory["services"]["kubeadm"]["kubernetesVersion"] first_control_plane = cluster.nodes['control-plane'].get_first_member() node_name = first_control_plane.get_node_name() @@ -808,8 +840,8 @@ def upgrade_first_control_plane(upgrade_group: NodeGroup, cluster: KubernetesClu cluster.log.debug("Upgrading first control-plane \"%s\"" % node_name) # put control-plane patches - create_kubeadm_patches_for_node(cluster, first_control_plane) - + create_kubeadm_patches_for_node(cluster, first_control_plane, dry_run=dry_run) + # TODO: when k8s v1.21 is excluded from Kubemarine, this condition should be removed # and only "else" branch remains if "v1.21" in cluster.inventory["services"]["kubeadm"]["kubernetesVersion"]: @@ -821,17 +853,21 @@ def upgrade_first_control_plane(upgrade_group: NodeGroup, cluster: KubernetesClu flags += " --config /tmp/kubeadm_config.yaml" drain_cmd = prepare_drain_command(cluster, node_name, **drain_kwargs) - first_control_plane.sudo(drain_cmd, hide=False) + first_control_plane.sudo(drain_cmd, hide=False, dry_run=dry_run) - upgrade_cri_if_required(first_control_plane) + upgrade_cri_if_required(first_control_plane, dry_run=dry_run) # The procedure for removing the deprecated kubelet flag for versions older than 1.27.0 - fix_flag_kubelet(cluster, first_control_plane) + fix_flag_kubelet(cluster, first_control_plane, dry_run=dry_run) first_control_plane.sudo( f"sudo kubeadm upgrade apply {version} {flags} && " f"sudo kubectl uncordon {node_name} && " - f"sudo systemctl restart kubelet", hide=False) + f"sudo systemctl restart kubelet", hide=False, dry_run=dry_run) + if dry_run: + cluster.log.debug("[dry-run]Nodes have correct Kubernetes version...") + cluster.log.debug("[dry-run] Pods are ready!") + return copy_admin_config(cluster.log, first_control_plane) @@ -840,7 +876,7 @@ def upgrade_first_control_plane(upgrade_group: NodeGroup, cluster: KubernetesClu exclude_node_from_upgrade_list(first_control_plane, node_name) -def upgrade_other_control_planes(upgrade_group: NodeGroup, cluster: KubernetesCluster, **drain_kwargs): +def upgrade_other_control_planes(upgrade_group: NodeGroup, cluster: KubernetesCluster, dry_run=False, **drain_kwargs): version = cluster.inventory["services"]["kubeadm"]["kubernetesVersion"] first_control_plane = cluster.nodes['control-plane'].get_first_member() @@ -855,15 +891,14 @@ def upgrade_other_control_planes(upgrade_group: NodeGroup, cluster: KubernetesCl cluster.log.debug("Upgrading control-plane \"%s\"" % node_name) # put control-plane patches - create_kubeadm_patches_for_node(cluster, node) - + create_kubeadm_patches_for_node(cluster, node, dry_run=dry_run) drain_cmd = prepare_drain_command(cluster, node_name, **drain_kwargs) - node.sudo(drain_cmd, hide=False) + node.sudo(drain_cmd, hide=False, dry_run=dry_run) - upgrade_cri_if_required(node) + upgrade_cri_if_required(node, dry_run=dry_run) # The procedure for removing the deprecated kubelet flag for versions older than 1.27.0 - fix_flag_kubelet(cluster, node) + fix_flag_kubelet(cluster, node, dry_run=dry_run) # TODO: when k8s v1.21 is excluded from Kubemarine, this condition should be removed # and only "else" branch remains @@ -874,21 +909,24 @@ def upgrade_other_control_planes(upgrade_group: NodeGroup, cluster: KubernetesCl f"/etc/kubernetes/manifests/kube-apiserver.yaml && " f"sudo kubectl uncordon {node_name} && " f"sudo systemctl restart kubelet", - hide=False) + hide=False, dry_run=dry_run) else: node.sudo( f"sudo kubeadm upgrade node --certificate-renewal=true --patches=/etc/kubernetes/patches && " f"sudo kubectl uncordon {node_name} && " f"sudo systemctl restart kubelet", - hide=False) - + hide=False, dry_run=dry_run) + if dry_run: + cluster.log.debug("[dry-run]Nodes have correct Kubernetes version...") + cluster.log.debug("[dry-run] Pods are ready!") + continue expect_kubernetes_version(cluster, version, apply_filter=node_name) copy_admin_config(cluster.log, node) wait_for_any_pods(cluster, node, apply_filter=node_name) exclude_node_from_upgrade_list(first_control_plane, node_name) -def patch_kubeadm_configmap(first_control_plane: NodeGroup, cluster: KubernetesCluster): +def patch_kubeadm_configmap(first_control_plane: NodeGroup, cluster: KubernetesCluster, dry_run=False): ''' Checks and patches the Kubeadm configuration for compliance with the current imageRepository, audit log path and the corresponding version of the CoreDNS path to the image. @@ -925,12 +963,12 @@ def patch_kubeadm_configmap(first_control_plane: NodeGroup, cluster: KubernetesC kubelet_config = first_control_plane.sudo("cat /var/lib/kubelet/config.yaml").get_simple_out() ryaml.dump(cluster_config, updated_config) result_config = kubelet_config + "---\n" + updated_config.getvalue() - first_control_plane.put(io.StringIO(result_config), "/tmp/kubeadm_config.yaml", sudo=True) + first_control_plane.put(io.StringIO(result_config), "/tmp/kubeadm_config.yaml", sudo=True, dry_run=dry_run) return True -def upgrade_workers(upgrade_group: NodeGroup, cluster: KubernetesCluster, **drain_kwargs): +def upgrade_workers(upgrade_group: NodeGroup, cluster: KubernetesCluster, dry_run=False, **drain_kwargs): version = cluster.inventory["services"]["kubeadm"]["kubernetesVersion"] first_control_plane = cluster.nodes['control-plane'].get_first_member() @@ -945,28 +983,31 @@ def upgrade_workers(upgrade_group: NodeGroup, cluster: KubernetesCluster, **drai cluster.log.debug("Upgrading worker \"%s\"" % node_name) # put control-plane patches - create_kubeadm_patches_for_node(cluster, node) - + create_kubeadm_patches_for_node(cluster, node, dry_run=dry_run) drain_cmd = prepare_drain_command(cluster, node_name, **drain_kwargs) - first_control_plane.sudo(drain_cmd, hide=False) + first_control_plane.sudo(drain_cmd, hide=False, dry_run=dry_run) - upgrade_cri_if_required(node) + upgrade_cri_if_required(node, dry_run=dry_run) # The procedure for removing the deprecated kubelet flag for versions older than 1.27.0 - fix_flag_kubelet(cluster, node) + fix_flag_kubelet(cluster, node, dry_run=dry_run) # TODO: when k8s v1.21 is excluded from Kubemarine, this condition should be removed # and only "else" branch remains if "v1.21" in cluster.inventory["services"]["kubeadm"]["kubernetesVersion"]: node.sudo( "kubeadm upgrade node --certificate-renewal=true && " - "sudo systemctl restart kubelet") + "sudo systemctl restart kubelet", dry_run=dry_run) else: node.sudo( "kubeadm upgrade node --certificate-renewal=true --patches=/etc/kubernetes/patches && " - "sudo systemctl restart kubelet") + "sudo systemctl restart kubelet", dry_run=dry_run) - first_control_plane.sudo("kubectl uncordon %s" % node_name, hide=False) + first_control_plane.sudo("kubectl uncordon %s" % node_name, hide=False, dry_run=dry_run) + if dry_run: + cluster.log.debug("[dry-run]Nodes have correct Kubernetes version...") + cluster.log.debug("[dry-run] Pods are ready!") + continue expect_kubernetes_version(cluster, version, apply_filter=node_name) # workers do not have system pods to wait for their start @@ -991,7 +1032,7 @@ def prepare_drain_command(cluster: KubernetesCluster, node_name: str, return drain_cmd -def upgrade_cri_if_required(group: NodeGroup): +def upgrade_cri_if_required(group: NodeGroup, dry_run=False): # currently it is invoked only for single node cluster: KubernetesCluster = group.cluster log = cluster.log @@ -1004,9 +1045,9 @@ def upgrade_cri_if_required(group: NodeGroup): packages.install(group, include=cri_packages) log.debug(f"Restarting all containers on node: {group.get_node_name()}") if cri_impl == "docker": - group.sudo("docker container rm -f $(sudo docker container ls -q)", warn=True) + group.sudo("docker container rm -f $(sudo docker container ls -q)", warn=True, dry_run=dry_run) else: - group.sudo("crictl rm -fa", warn=True) + group.sudo("crictl rm -fa", warn=True, dry_run=dry_run) else: log.debug(f"{cri_impl} upgrade is not required") @@ -1259,7 +1300,7 @@ def images_grouped_prepull(group: NodeGroup, group_size: int = None): for group_i in range(groups_amount): log.verbose('Prepulling images for group #%s...' % group_i) # RemoteExecutor used for future cases, when some nodes will require another/additional actions for prepull - for node_i in range(group_i*group_size, (group_i*group_size)+group_size): + for node_i in range(group_i*group_size, (group_i*group_size) + group_size): if node_i < nodes_amount: images_prepull(nodes[node_i], collector=collector) @@ -1272,7 +1313,7 @@ def images_prepull(group: DeferredGroup, collector: CollectorCallback) -> Token: :param group: NodeGroup where prepull should be performed. :return: NodeGroupResult from all nodes in presented group. """ - + dry_run = utils.check_dry_run_status_active(group.cluster) config = get_kubeadm_config(group.cluster.inventory) kubeadm_init: dict = { 'apiVersion': group.cluster.inventory["services"]["kubeadm"]['apiVersion'], @@ -1282,13 +1323,14 @@ def images_prepull(group: DeferredGroup, collector: CollectorCallback) -> Token: configure_container_runtime(group.cluster, kubeadm_init) config = f'{config}---\n{yaml.dump(kubeadm_init, default_flow_style=False)}' - group.put(io.StringIO(config), '/etc/kubernetes/prepull-config.yaml', sudo=True) - - return group.sudo("kubeadm config images pull --config=/etc/kubernetes/prepull-config.yaml", - callback=collector) + group.put(io.StringIO(config), '/etc/kubernetes/prepull-config.yaml', sudo=True, dry_run=dry_run) + return group.sudo("kubeadm config images pull --config=/etc/kubernetes/prepull-config.yaml", callback=collector, dry_run=dry_run) def schedule_running_nodes_report(cluster: KubernetesCluster): + if utils.check_dry_run_status_active(cluster): + cluster.log.verbose("[dry-run] Scheduling running nodes report") + return summary.schedule_delayed_report(cluster, exec_running_nodes_report) @@ -1372,8 +1414,11 @@ def get_patched_flags_for_control_plane_item(inventory: dict, control_plane_item # function to create kubeadm patches and put them to a node -def create_kubeadm_patches_for_node(cluster: KubernetesCluster, node: NodeGroup): + +def create_kubeadm_patches_for_node(cluster: KubernetesCluster, node: NodeGroup, dry_run=False): cluster.log.verbose(f"Create and upload kubeadm patches to %s..." % node.get_node_name()) + if dry_run: + return node.sudo('sudo rm -rf /etc/kubernetes/patches ; sudo mkdir -p /etc/kubernetes/patches', warn=True) # TODO: when k8s v1.21 is excluded from Kubemarine, this condition should be removed @@ -1407,7 +1452,7 @@ def create_kubeadm_patches_for_node(cluster: KubernetesCluster, node: NodeGroup) return -def fix_flag_kubelet(cluster: KubernetesCluster, node: NodeGroup): +def fix_flag_kubelet(cluster: KubernetesCluster, node: NodeGroup, dry_run=False): #Deprecated flag removal function for kubelet kubeadm_file = "/var/lib/kubelet/kubeadm-flags.env" version = cluster.inventory["services"]["kubeadm"]["kubernetesVersion"] @@ -1419,7 +1464,7 @@ def fix_flag_kubelet(cluster: KubernetesCluster, node: NodeGroup): kubeadm_flags = node.sudo(f"cat {kubeadm_file}").get_simple_out() if kubeadm_flags.find('--container-runtime=remote') != -1: kubeadm_flags = kubeadm_flags.replace('--container-runtime=remote', '') - node.put(io.StringIO(kubeadm_flags), kubeadm_file, backup=True, sudo=True) + node.put(io.StringIO(kubeadm_flags), kubeadm_file, backup=True, sudo=True, dry_run=dry_run) def _config_changer(config: str, word: str): @@ -1433,4 +1478,3 @@ def _config_changer(config: str, word: str): else: param_end_pos = config.rfind("\"") return config[:param_end_pos] + " " + word[:] + "\"" - diff --git a/kubemarine/kubernetes_accounts.py b/kubemarine/kubernetes_accounts.py index 782b3ca01..9ad77025c 100644 --- a/kubemarine/kubernetes_accounts.py +++ b/kubemarine/kubernetes_accounts.py @@ -66,6 +66,7 @@ def enrich_inventory(inventory: dict, _: KubernetesCluster) -> dict: def install(cluster: KubernetesCluster) -> None: rbac = cluster.inventory['rbac'] + dry_run = utils.check_dry_run_status_active(cluster) if not rbac.get("accounts"): cluster.log.debug("No accounts specified to install, skipping...") return @@ -86,10 +87,10 @@ def install(cluster: KubernetesCluster) -> None: cluster.log.debug("Uploading template...") cluster.log.debug("\tDestination: %s" % destination_path) - cluster.nodes['control-plane'].put(io.StringIO(dump), destination_path, sudo=True) + cluster.nodes['control-plane'].put(io.StringIO(dump), destination_path, sudo=True, dry_run=dry_run) cluster.log.debug("Applying yaml...") - cluster.nodes['control-plane'].get_first_member().sudo('kubectl apply -f %s' % destination_path, hide=False) + cluster.nodes['control-plane'].get_first_member().sudo('kubectl apply -f %s' % destination_path, hide=False, dry_run=dry_run) cluster.log.debug('Loading token...') load_tokens_cmd = 'kubectl -n %s get secret ' \ @@ -98,14 +99,17 @@ def install(cluster: KubernetesCluster) -> None: token: Optional[str] = None retries = cluster.globals['accounts']['retries'] - # Token creation in Kubernetes 1.24 is not syncronus, therefore retries are necessary - while retries > 0: - result = cluster.nodes['control-plane'].get_first_member().sudo(load_tokens_cmd) - token = list(result.values())[0].stdout - if not token: - retries -= 1 - else: - break + if dry_run: + token = ["dry-run"] + else: + # Token creation in Kubernetes 1.24 is not syncronus, therefore retries are necessary + while retries > 0: + result = cluster.nodes['control-plane'].get_first_member().sudo(load_tokens_cmd) + token = list(result.values())[0].stdout + if not token: + retries -= 1 + else: + break if not token: raise Exception(f"The token loading for {account['name']} 'ServiceAccount' failed") diff --git a/kubemarine/plugins/__init__.py b/kubemarine/plugins/__init__.py index dbc69f546..d525c28e0 100755 --- a/kubemarine/plugins/__init__.py +++ b/kubemarine/plugins/__init__.py @@ -198,8 +198,7 @@ def install(cluster: KubernetesCluster, plugins_: Dict[str, dict] = None): def install_plugin(cluster: KubernetesCluster, plugin_name: str, installation_procedure: List[dict]): cluster.log.debug("**** INSTALLING PLUGIN %s ****" % plugin_name) - - for current_step_i, step in enumerate(installation_procedure): + for _, step in enumerate(installation_procedure): for apply_type, configs in step.items(): procedure_types()[apply_type]['apply'](cluster, configs, plugin_name) @@ -414,7 +413,9 @@ def expect_deployment(cluster: KubernetesCluster, def expect_pods(cluster: KubernetesCluster, pods: List[str], namespace=None, timeout=None, retries=None, node: NodeGroup = None, apply_filter: str = None): - + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("Pods are ready!") + return None if timeout is None: timeout = cluster.inventory['globals']['expect']['pods']['plugins']['timeout'] if retries is None: @@ -545,6 +546,9 @@ def apply_expect(cluster: KubernetesCluster, config: dict, plugin_name=None): # TODO: Add support for expect services and expect nodes for expect_type, expect_conf in config.items(): + if utils.check_dry_run_status_active(cluster): + cluster.log.debug(f"[dry-run] {expect_type} are up to date...") + return None if expect_type == 'daemonsets': expect_daemonset(cluster, config['daemonsets']['list'], timeout=config['daemonsets'].get('timeout'), @@ -712,10 +716,14 @@ def apply_shell(cluster: KubernetesCluster, step: dict, plugin_name=None): in_vars_dict[var_name] = f"'{var_value}'" cluster.log.debug('Running shell command...') + dry_run = utils.check_dry_run_status_active(cluster) if sudo: - result = common_group.sudo(commands, env=in_vars_dict) + result = common_group.sudo(commands, env=in_vars_dict, dry_run=dry_run) else: - result = common_group.run(commands, env=in_vars_dict) + result = common_group.run(commands, env=in_vars_dict, dry_run=dry_run) + + if dry_run: + return None if out_vars: stdout = list(result.values())[0].stdout @@ -778,6 +786,9 @@ def apply_ansible(cluster: KubernetesCluster, step: dict, plugin_name=None): cluster.log.verbose("Running shell \"%s\"" % command) result = subprocess.run(command, stdout=sys.stdout, stderr=sys.stderr, shell=True) + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Successfully applied ansible plugin...") + return None if result.returncode != 0: raise Exception("Failed to apply ansible plugin, see error above") @@ -821,6 +832,9 @@ def apply_helm(cluster: KubernetesCluster, config: dict, plugin_name=None): command = prepare_for_helm_command + f'{deployment_mode} {release} {chart_path} --create-namespace --debug' output = subprocess.check_output(command, shell=True) + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Successfully applied helm plugin...") + return None cluster.log.debug(output.decode('utf-8')) return output @@ -1008,7 +1022,7 @@ def apply_source(cluster: KubernetesCluster, config: dict) -> None: source = config['source'] destination_path = config['destination'] apply_command = config.get('apply_command', 'kubectl apply -f %s' % destination_path) - + dry_run = utils.check_dry_run_status_active(cluster) if not destination_groups and not destination_nodes: destination_common_group = cluster.nodes['control-plane'] else: @@ -1019,14 +1033,14 @@ def apply_source(cluster: KubernetesCluster, config: dict) -> None: else: apply_common_group = cluster.create_group_from_groups_nodes_names(apply_groups, apply_nodes) - destination_common_group.put(source, destination_path, backup=True, sudo=use_sudo) + destination_common_group.put(source, destination_path, backup=True, sudo=use_sudo, dry_run=dry_run) if apply_required: cluster.log.debug("Applying yaml...") if use_sudo: - apply_common_group.sudo(apply_command, hide=False) + apply_common_group.sudo(apply_command, hide=False, dry_run=dry_run) else: - apply_common_group.run(apply_command, hide=False) + apply_common_group.run(apply_command, hide=False, dry_run=dry_run) else: cluster.log.debug('Apply is not required') diff --git a/kubemarine/plugins/nginx_ingress.py b/kubemarine/plugins/nginx_ingress.py index 06cc76fdd..e2289bbe7 100644 --- a/kubemarine/plugins/nginx_ingress.py +++ b/kubemarine/plugins/nginx_ingress.py @@ -28,6 +28,9 @@ def check_job_for_nginx(cluster: KubernetesCluster) -> None: major_version = int(version[1]) minor_version = int(version[2]) + if utils.check_dry_run_status_active(cluster): + cluster.log.debug('[dry-run] There are no jobs to delete') + return check_jobs = first_control_plane.sudo(f"kubectl get jobs -n ingress-nginx") if list(check_jobs.values())[0].stderr == "" and major_version >= 1 and minor_version >= 4: @@ -103,6 +106,10 @@ def manage_custom_certificate(cluster: KubernetesCluster) -> None: first_control_plane = cluster.nodes["control-plane"].get_first_member() default_cert = cluster.inventory["plugins"]["nginx-ingress-controller"]["controller"]["ssl"]["default-certificate"] + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Successfully aplied custom certificates") + return + # first, we need to load cert and key files to first control-plane to known locations first_control_plane.sudo(f"mkdir -p {base_path}") try: diff --git a/kubemarine/procedures/add_node.py b/kubemarine/procedures/add_node.py index e9795a69b..d4f1cb6f9 100755 --- a/kubemarine/procedures/add_node.py +++ b/kubemarine/procedures/add_node.py @@ -102,12 +102,24 @@ def cache_installed_packages(cluster: KubernetesCluster): packages.cache_package_versions(cluster, cluster.inventory, by_initial_nodes=True) +def validate_new_nodes(cluster: KubernetesCluster): + old_nodes = set([node['connect_to'] for node in cluster.inventory['nodes'] + if 'add_node' not in node['roles']]) + new_nodes = set(cluster.nodes['add_node'].get_hosts()) + for host in new_nodes: + if host in old_nodes: + raise Exception(f"Node {host} is already present in cluster") + return None + + tasks: typing.OrderedDict[str, Any] = OrderedDict(copy.deepcopy(install.tasks)) del tasks["deploy"]["plugins"] del tasks["deploy"]["accounts"] tasks["deploy"]["kubernetes"]["init"] = deploy_kubernetes_join tasks["cache_packages"] = cache_installed_packages +tasks["validate_nodes"] = validate_new_nodes tasks.move_to_end("cache_packages", last=False) +tasks.move_to_end("validate_nodes", last=False) class AddNodeAction(Action): diff --git a/kubemarine/procedures/backup.py b/kubemarine/procedures/backup.py index 08e80782f..1e332beab 100755 --- a/kubemarine/procedures/backup.py +++ b/kubemarine/procedures/backup.py @@ -404,6 +404,8 @@ def pack_data(cluster: KubernetesCluster): target = os.path.join(target, backup_filename) cluster.log.debug('Packing all data...') + if utils.check_dry_run_status_active(cluster): + return None with tarfile.open(target, "w:gz") as tar_handle: for root, dirs, files in os.walk(backup_directory): for file in files: diff --git a/kubemarine/procedures/do.py b/kubemarine/procedures/do.py index 0f1a8ff6a..31ba81200 100755 --- a/kubemarine/procedures/do.py +++ b/kubemarine/procedures/do.py @@ -18,6 +18,7 @@ import sys from typing import Callable, List, Dict +from kubemarine.core import utils from kubemarine.core import flow, resources from kubemarine.core.action import Action from kubemarine.core.cluster import KubernetesCluster @@ -31,6 +32,7 @@ shell_command command to execute on nodes """ + class CLIAction(Action): def __init__(self, node_group_provider: Callable[[KubernetesCluster], NodeGroup], remote_args: List[str], no_stream: bool) -> None: @@ -45,7 +47,9 @@ def run(self, res: DynamicResources) -> None: if executors_group.is_empty(): print('Failed to find any of specified nodes or groups') sys.exit(1) - + if utils.check_dry_run_status_active(cluster): + result = executors_group.sudo(" ".join(self.remote_args), hide=self.no_stream, warn=True, dry_run=True) + sys.exit(0) result = executors_group.sudo(" ".join(self.remote_args), hide=self.no_stream, warn=True) if self.no_stream: cluster.log.debug(result) diff --git a/kubemarine/procedures/install.py b/kubemarine/procedures/install.py index 894109f20..3638508e8 100755 --- a/kubemarine/procedures/install.py +++ b/kubemarine/procedures/install.py @@ -147,6 +147,9 @@ def system_prepare_system_setup_selinux(group: NodeGroup): @_applicable_for_new_nodes_with_roles('all') def system_prepare_system_setup_apparmor(group: NodeGroup): + if utils.check_dry_run_status_active(group.cluster): + group.cluster.log.debug("[dry-run] Setting up apparmor") + return group.call(apparmor.setup_apparmor) @@ -181,26 +184,30 @@ def system_prepare_policy(group: NodeGroup): Task generates rules for logging kubernetes and on audit """ cluster: KubernetesCluster = group.cluster + dry_run = utils.check_dry_run_status_active(cluster) api_server_extra_args = cluster.inventory['services']['kubeadm']['apiServer']['extraArgs'] audit_log_dir = os.path.dirname(api_server_extra_args['audit-log-path']) audit_file_name = api_server_extra_args['audit-policy-file'] audit_policy_dir = os.path.dirname(audit_file_name) - group.sudo(f"mkdir -p {audit_log_dir} && sudo mkdir -p {audit_policy_dir}") + group.sudo(f"mkdir -p {audit_log_dir} && sudo mkdir -p {audit_policy_dir}", dry_run=dry_run) policy_config = cluster.inventory['services']['audit'].get('cluster_policy') collect_node = group.get_ordered_members_list() if policy_config: policy_config_file = yaml.dump(policy_config) utils.dump_file(cluster, policy_config_file, 'audit-policy.yaml') - #download rules in cluster + # download rules in cluster for node in collect_node: - node.put(io.StringIO(policy_config_file), audit_file_name, sudo=True, backup=True) + node.put(io.StringIO(policy_config_file), audit_file_name, sudo=True, backup=True, dry_run=dry_run) audit_config = True cluster.log.debug("Audit cluster policy config") else: audit_config = False cluster.log.debug("Audit cluster policy config is empty, nothing will be configured ") + if dry_run: + return + if kubernetes.is_cluster_installed(cluster) and audit_config is True and cluster.context['initial_procedure'] != 'add_node': for control_plane in collect_node: node_config = control_plane.get_config() @@ -254,10 +261,11 @@ def system_prepare_policy(group: NodeGroup): @_applicable_for_new_nodes_with_roles('all') def system_prepare_dns_hostname(group: NodeGroup): cluster: KubernetesCluster = group.cluster + dry_run = utils.check_dry_run_status_active(cluster) with group.new_executor() as exe: for node in exe.group.get_ordered_members_list(): cluster.log.debug("Changing hostname '%s' = '%s'" % (node.get_host(), node.get_node_name())) - node.sudo("hostnamectl set-hostname %s" % node.get_node_name()) + node.sudo("hostnamectl set-hostname %s" % node.get_node_name(), dry_run=dry_run) @_applicable_for_new_nodes_with_roles('all') @@ -266,9 +274,9 @@ def system_prepare_dns_resolv_conf(group: NodeGroup): if cluster.inventory["services"].get("resolv.conf") is None: cluster.log.debug("Skipped - resolv.conf section not defined in config file") return - - system.update_resolv_conf(group, config=cluster.inventory["services"].get("resolv.conf")) - cluster.log.debug(group.sudo("ls -la /etc/resolv.conf; sudo lsattr /etc/resolv.conf")) + dry_run = utils.check_dry_run_status_active(cluster) + system.update_resolv_conf(group, config=cluster.inventory["services"].get("resolv.conf"), dry_run=dry_run) + cluster.log.debug(group.sudo("ls -la /etc/resolv.conf; sudo lsattr /etc/resolv.conf", dry_run=dry_run)) def system_prepare_dns_etc_hosts(cluster: KubernetesCluster): @@ -279,9 +287,10 @@ def system_prepare_dns_etc_hosts(cluster: KubernetesCluster): cluster.log.debug("\nUploading...") group = cluster.nodes['all'].get_final_nodes() + dry_run = utils.check_dry_run_status_active(cluster) - system.update_etc_hosts(group, config=config) - cluster.log.debug(group.sudo("ls -la /etc/hosts")) + system.update_etc_hosts(group, config=config, dry_run=dry_run) + cluster.log.debug(group.sudo("ls -la /etc/hosts", dry_run=dry_run)) @_applicable_for_new_nodes_with_roles('all') @@ -310,6 +319,7 @@ def system_prepare_package_manager_manage_packages(group: NodeGroup): def manage_mandatory_packages(group: NodeGroup) -> RunnersGroupResult: cluster: KubernetesCluster = group.cluster + dry_run = utils.check_dry_run_status_active(cluster) collector = CollectorCallback(cluster) with group.new_executor() as exe: for node in exe.group.get_ordered_members_list(): @@ -320,7 +330,8 @@ def manage_mandatory_packages(group: NodeGroup) -> RunnersGroupResult: if pkgs: cluster.log.debug(f"Installing {pkgs} on {node.get_node_name()!r}") - packages.install(node, include=pkgs, callback=collector) + if not dry_run: + packages.install(node, include=pkgs, callback=collector) return collector.result @@ -355,6 +366,10 @@ def manage_custom_packages(group: NodeGroup) -> None: cluster.log.debug("Skipped - no packages configuration defined in config file") return None + if utils.check_dry_run_status_active(cluster): + cluster.log.verbose("[dry-run] Managing Custom Packages...") + return None + any_changes_found = False for action, results in batch_results.items(): cluster.log.verbose('Verifying packages changes after \'%s\' action...' % action) @@ -401,10 +416,16 @@ def system_prepare_thirdparties(group: NodeGroup): @_applicable_for_new_nodes_with_roles('balancer') def deploy_loadbalancer_haproxy_install(group: NodeGroup): + if utils.check_dry_run_status_active(group.cluster): + group.cluster.log.debug("[dry-run] Installing load-balancer HA-PROXY") + return group.call(haproxy.install) def deploy_loadbalancer_haproxy_configure(cluster: KubernetesCluster): + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Configuring load-balancer HA-PROXY") + return if not cluster.inventory['services'].get('loadbalancer', {}) \ .get('haproxy', {}).get('keep_configs_updated', True): @@ -454,6 +475,9 @@ def deploy_loadbalancer_keepalived_install(cluster: KubernetesCluster): cluster.log.debug('Skipped - no VRRP IPs to perform') return + if utils.check_dry_run_status_active(cluster): + cluster.log.debug('Installing load-balancer Keepalived') + return # add_node will impact all keepalived group.call(keepalived.install) @@ -516,13 +540,14 @@ def deploy_kubernetes_init(cluster: KubernetesCluster): def deploy_coredns(cluster: KubernetesCluster): + dry_run = utils.check_dry_run_status_active(cluster) config = coredns.generate_configmap(cluster.inventory) cluster.log.debug('Applying patch...') - cluster.log.debug(coredns.apply_patch(cluster)) + cluster.log.debug(coredns.apply_patch(cluster, dry_run)) cluster.log.debug('Applying configmap...') - cluster.log.debug(coredns.apply_configmap(cluster, config)) + cluster.log.debug(coredns.apply_configmap(cluster, config, dry_run)) def deploy_plugins(cluster: KubernetesCluster): @@ -536,6 +561,9 @@ def deploy_accounts(cluster: KubernetesCluster): def overview(cluster: KubernetesCluster): cluster.log.debug("Retrieving cluster status...") control_plane = cluster.nodes["control-plane"].get_final_nodes().get_first_member() + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Cluster is healthy...") + return cluster.log.debug("\nNAMESPACES:") control_plane.sudo("kubectl get namespaces", hide=False) cluster.log.debug("\nNODES:") diff --git a/kubemarine/procedures/migrate_cri.py b/kubemarine/procedures/migrate_cri.py index ff5641efb..05f7781f8 100755 --- a/kubemarine/procedures/migrate_cri.py +++ b/kubemarine/procedures/migrate_cri.py @@ -22,6 +22,7 @@ from kubemarine import kubernetes, etcd, thirdparties, cri from kubemarine.core import flow +from kubemarine.core import utils from kubemarine.core.action import Action from kubemarine.core.cluster import KubernetesCluster from kubemarine.core.group import NodeGroup @@ -147,7 +148,7 @@ def _migrate_cri(cluster: KubernetesCluster, node_group: List[NodeGroup]): :param cluster: main object describing a cluster :param node_group: group of nodes to migrate """ - + dry_run = utils.check_dry_run_status_active(cluster) for node in node_group: node_config = node.get_config() node_name = node.get_node_name() @@ -163,7 +164,7 @@ def _migrate_cri(cluster: KubernetesCluster, node_group: List[NodeGroup]): version = cluster.inventory["services"]["kubeadm"]["kubernetesVersion"] cluster.log.debug("Migrating \"%s\"..." % node_name) drain_cmd = kubernetes.prepare_drain_command(cluster, node_name, disable_eviction=True) - control_plane.sudo(drain_cmd, hide=False) + control_plane.sudo(drain_cmd, hide=False, dry_run=dry_run) kubeadm_flags_file = "/var/lib/kubelet/kubeadm-flags.env" kubeadm_flags = node.sudo(f"cat {kubeadm_flags_file}").get_simple_out() @@ -175,14 +176,14 @@ def _migrate_cri(cluster: KubernetesCluster, node_group: List[NodeGroup]): kubeadm_flags = edit_config(kubeadm_flags) - node.put(io.StringIO(kubeadm_flags), kubeadm_flags_file, backup=True, sudo=True) + node.put(io.StringIO(kubeadm_flags), kubeadm_flags_file, backup=True, sudo=True, dry_run=dry_run) - node.sudo("systemctl stop kubelet") - docker.prune(node) + node.sudo("systemctl stop kubelet", dry_run=dry_run) + docker.prune(node, dry_run=dry_run) docker_associations = cluster.get_associations_for_node(node.get_host(), 'docker') node.sudo(f"systemctl disable {docker_associations['service_name']} --now; " - "sudo sh -c 'rm -rf /var/lib/docker/*'") + "sudo sh -c 'rm -rf /var/lib/docker/*'", dry_run=dry_run) cluster.log.debug('Reinstalling CRI...') cri.install(node) @@ -190,6 +191,8 @@ def _migrate_cri(cluster: KubernetesCluster, node_group: List[NodeGroup]): cluster.log.debug(f'CRI configured! Restoring pods on node "{node_name}"') + if dry_run: + return # if there is a disk for docker in "/etc/fstab", then use this disk for containerd docker_disk_result = node.sudo("cat /etc/fstab | grep ' /var/lib/docker '", warn=True) docker_disk = list(docker_disk_result.values())[0].stdout.strip() @@ -253,6 +256,7 @@ def release_calico_leaked_ips(cluster: KubernetesCluster): Those ips are cleaned by calico garbage collector, but it can take about 20 minutes. This task releases problem ips with force. """ + dry_run = utils.check_dry_run_status_active(cluster) first_control_plane = cluster.nodes['control-plane'].get_first_member() cluster.log.debug("Getting leaked ips...") random_report_name = "/tmp/%s.json" % uuid.uuid4().hex @@ -261,9 +265,9 @@ def release_calico_leaked_ips(cluster: KubernetesCluster): leaked_ips_count = leaked_ips.count('leaked') cluster.log.debug(f"Found {leaked_ips_count} leaked ips") if leaked_ips_count != 0: - first_control_plane.sudo(f"calicoctl ipam release --from-report={random_report_name} --force", hide=False) + first_control_plane.sudo(f"calicoctl ipam release --from-report={random_report_name} --force", hide=False, dry_run=dry_run) cluster.log.debug("Leaked ips was released") - first_control_plane.sudo(f"rm {random_report_name}", hide=False) + first_control_plane.sudo(f"rm {random_report_name}", hide=False, dry_run=dry_run) def edit_config(kubeadm_flags: str): diff --git a/kubemarine/procedures/migrate_kubemarine.py b/kubemarine/procedures/migrate_kubemarine.py index 815bee793..5e2a7c076 100644 --- a/kubemarine/procedures/migrate_kubemarine.py +++ b/kubemarine/procedures/migrate_kubemarine.py @@ -119,7 +119,7 @@ def upgrade_cri(self, group: NodeGroup, workers: bool) -> None: drain_timeout = cluster.procedure_inventory.get('drain_timeout') grace_period = cluster.procedure_inventory.get('grace_period') disable_eviction = cluster.procedure_inventory.get("disable-eviction", True) - + dry_run = utils.check_dry_run_status_active(cluster) for node in group.get_ordered_members_list(): node_name = node.get_node_name() control_plane = node @@ -129,11 +129,13 @@ def upgrade_cri(self, group: NodeGroup, workers: bool) -> None: drain_cmd = kubernetes.prepare_drain_command( cluster, node_name, disable_eviction=disable_eviction, drain_timeout=drain_timeout, grace_period=grace_period) - control_plane.sudo(drain_cmd, hide=False) - - kubernetes.upgrade_cri_if_required(node) - node.sudo('systemctl restart kubelet') + control_plane.sudo(drain_cmd, hide=False, dry_run=dry_run) + kubernetes.upgrade_cri_if_required(node, dry_run=dry_run) + node.sudo('systemctl restart kubelet', dry_run=dry_run) + if dry_run: + cluster.log.debug("[dry-run] Upgraded Cri...") + return if workers: control_plane.sudo(f"kubectl uncordon {node_name}", hide=False) else: @@ -204,10 +206,11 @@ def run(self, res: DynamicResources) -> None: def _run(self, group: NodeGroup) -> None: cluster: KubernetesCluster = group.cluster packages.install(group, include=cluster.get_package_association(self.package_name, 'package_name')) + dry_run = utils.check_dry_run_status_active(cluster) if self.package_name == 'haproxy': - haproxy.restart(group) + haproxy.restart(group, dry_run) else: - keepalived.restart(group) + keepalived.restart(group, dry_run) def associations_changed(self, res: DynamicResources) -> bool: """ diff --git a/kubemarine/procedures/restore.py b/kubemarine/procedures/restore.py index f049c9f16..05edff84b 100755 --- a/kubemarine/procedures/restore.py +++ b/kubemarine/procedures/restore.py @@ -108,19 +108,24 @@ def verify_backup_data(cluster: KubernetesCluster): def stop_cluster(cluster: KubernetesCluster): cluster.log.debug('Stopping the existing cluster...') cri_impl = cluster.inventory['services']['cri']['containerRuntime'] + dry_run = utils.check_dry_run_status_active(cluster) if cri_impl == "docker": - result = cluster.nodes['control-plane'].sudo('systemctl stop kubelet; ' - 'sudo docker kill $(sudo docker ps -q); ' - 'sudo docker rm -f $(sudo docker ps -a -q); ' - 'sudo docker ps -a; ' - 'sudo rm -rf /var/lib/etcd; ' - 'sudo mkdir -p /var/lib/etcd', warn=True) + result = cluster.nodes['control-plane'].sudo( + 'systemctl stop kubelet; ' + 'sudo docker kill $(sudo docker ps -q); ' + 'sudo docker rm -f $(sudo docker ps -a -q); ' + 'sudo docker ps -a; ' + 'sudo rm -rf /var/lib/etcd; ' + 'sudo mkdir -p /var/lib/etcd', warn=True, + dry_run=dry_run) else: - result = cluster.nodes['control-plane'].sudo('systemctl stop kubelet; ' - 'sudo crictl rm -fa; ' - 'sudo crictl ps -a; ' - 'sudo rm -rf /var/lib/etcd; ' - 'sudo mkdir -p /var/lib/etcd', warn=True) + result = cluster.nodes['control-plane'].sudo( + 'systemctl stop kubelet; ' + 'sudo crictl rm -fa; ' + 'sudo crictl ps -a; ' + 'sudo rm -rf /var/lib/etcd; ' + 'sudo mkdir -p /var/lib/etcd', warn=True, + dry_run=dry_run) cluster.log.verbose(result) @@ -136,12 +141,13 @@ def restore_thirdparties(cluster: KubernetesCluster): def import_nodes(cluster: KubernetesCluster): + dry_run = utils.check_dry_run_status_active(cluster) with cluster.nodes['all'].new_executor() as exe: for node in exe.group.get_ordered_members_list(): node_name = node.get_node_name() cluster.log.debug('Uploading backup for \'%s\'' % node_name) node.put(os.path.join(cluster.context['backup_tmpdir'], 'nodes_data', '%s.tar.gz' % node_name), - '/tmp/kubemarine-backup.tar.gz') + '/tmp/kubemarine-backup.tar.gz', dry_run=dry_run) cluster.log.debug('Unpacking backup...') @@ -149,7 +155,7 @@ def import_nodes(cluster: KubernetesCluster): result = cluster.nodes['all'].sudo( f"readlink /etc/resolv.conf ; " f"if [ $? -ne 0 ]; then sudo chattr -i /etc/resolv.conf; {unpack_cmd} && sudo chattr +i /etc/resolv.conf; " - f"else {unpack_cmd}; fi ") + f"else {unpack_cmd}; fi ", dry_run=dry_run) cluster.log.debug(result) @@ -163,6 +169,7 @@ def import_etcd(cluster: KubernetesCluster): etcd_peer_key = etcd_all_certificates.get('peer_key', cluster.globals['etcd']['default_arguments']['peer_key']) etcd_peer_cacert = etcd_all_certificates.get('peer_cacert', cluster.globals['etcd']['default_arguments']['peer_cacert']) + dry_run = utils.check_dry_run_status_active(cluster) etcd_image = cluster.procedure_inventory.get('restore_plan', {}).get('etcd', {}).get('image') if not etcd_image: @@ -173,8 +180,12 @@ def import_etcd(cluster: KubernetesCluster): cluster.log.debug('Uploading ETCD snapshot...') snap_name = '/var/lib/etcd/etcd-snapshot%s.db' % int(round(time.time() * 1000)) - cluster.nodes['control-plane'].put(os.path.join(cluster.context['backup_tmpdir'], 'etcd.db'), snap_name, sudo=True) - + cluster.nodes['control-plane'].put(os.path.join(cluster.context['backup_tmpdir'], 'etcd.db'), + snap_name, sudo=True, + dry_run=dry_run) + if dry_run: + cluster.log.verbose('ETCD cluster is healthy!') + return initial_cluster_list = [] initial_cluster_list_without_names = [] for control_plane in cluster.nodes['control-plane'].get_ordered_members_configs_list(): diff --git a/kubemarine/procedures/upgrade.py b/kubemarine/procedures/upgrade.py index c397b4e82..a5bcf4d7e 100755 --- a/kubemarine/procedures/upgrade.py +++ b/kubemarine/procedures/upgrade.py @@ -54,29 +54,32 @@ def kubernetes_upgrade(cluster: KubernetesCluster): drain_timeout = cluster.procedure_inventory.get('drain_timeout') grace_period = cluster.procedure_inventory.get('grace_period') disable_eviction = cluster.procedure_inventory.get("disable-eviction", True) + dry_run = utils.check_dry_run_status_active(cluster) drain_kwargs = { 'disable_eviction': disable_eviction, 'drain_timeout': drain_timeout, 'grace_period': grace_period } - kubernetes.upgrade_first_control_plane(upgrade_group, cluster, **drain_kwargs) + kubernetes.upgrade_first_control_plane(upgrade_group, cluster, dry_run=dry_run, **drain_kwargs) # After first control-plane upgrade is finished we may loose our CoreDNS changes. # Thus, we need to re-apply our CoreDNS changes immediately after first control-plane upgrade. install.deploy_coredns(cluster) - kubernetes.upgrade_other_control_planes(upgrade_group, cluster, **drain_kwargs) + kubernetes.upgrade_other_control_planes(upgrade_group, cluster, dry_run=dry_run, **drain_kwargs) if cluster.nodes.get('worker', []): - kubernetes.upgrade_workers(upgrade_group, cluster, **drain_kwargs) + kubernetes.upgrade_workers(upgrade_group, cluster, dry_run=dry_run, **drain_kwargs) - cluster.nodes['control-plane'].get_first_member().sudo('rm -f /etc/kubernetes/nodes-k8s-versions.txt') + cluster.nodes['control-plane'].get_first_member().sudo('rm -f /etc/kubernetes/nodes-k8s-versions.txt', dry_run=dry_run) cluster.context['cached_nodes_versions_cleaned'] = True def kubernetes_cleanup_nodes_versions(cluster: KubernetesCluster): + dry_run = utils.check_dry_run_status_active(cluster) + if not cluster.context.get('cached_nodes_versions_cleaned', False): cluster.log.verbose('Cached nodes versions required') - cluster.nodes['control-plane'].get_first_member().sudo('rm -f /etc/kubernetes/nodes-k8s-versions.txt') + cluster.nodes['control-plane'].get_first_member().sudo('rm -f /etc/kubernetes/nodes-k8s-versions.txt', dry_run=dry_run) else: cluster.log.verbose('Cached nodes versions already cleaned') kubernetes_apply_taints(cluster) @@ -139,6 +142,9 @@ def upgrade_containerd(cluster: KubernetesCluster): config_string += f"\n[{key}]\n{toml.dumps(value)}" utils.dump_file(cluster, config_string, 'containerd-config.toml') + if utils.check_dry_run_status_active(cluster): + return + kubernetes_nodes = cluster.make_group_from_roles(['control-plane', 'worker']) collector = CollectorCallback(cluster) for member_node in kubernetes_nodes.get_ordered_members_list(): diff --git a/kubemarine/selinux.py b/kubemarine/selinux.py index 084115296..301156a73 100644 --- a/kubemarine/selinux.py +++ b/kubemarine/selinux.py @@ -177,6 +177,7 @@ def is_config_valid(group: NodeGroup, state: str = None, policy: str = None, per def setup_selinux(group: NodeGroup) -> Optional[RunnersGroupResult]: log = group.cluster.log + dry_run = utils.check_dry_run_status_active(group.cluster) # this method handles cluster with multiple os, suppressing should be enabled if group.get_nodes_os() not in ['rhel', 'rhel8']: @@ -200,7 +201,7 @@ def setup_selinux(group: NodeGroup) -> Optional[RunnersGroupResult]: log.debug("Uploading selinux config...") utils.dump_file(group.cluster, config, 'selinux_config') - group.put(config, '/etc/selinux/config', backup=True, sudo=True) + group.put(config, '/etc/selinux/config', backup=True, sudo=True, dry_run=dry_run) semanage_commands = '' for item in expected_permissive: @@ -209,7 +210,7 @@ def setup_selinux(group: NodeGroup) -> Optional[RunnersGroupResult]: semanage_commands = semanage_commands + 'semanage permissive -a %s' % item log.verbose("The following command will be executed to configure permissive:\n%s" % semanage_commands) - group.sudo(semanage_commands) + group.sudo(semanage_commands, dry_run=dry_run) group.cluster.schedule_cumulative_point(system.reboot_nodes) group.cluster.schedule_cumulative_point(system.verify_system) diff --git a/kubemarine/sysctl.py b/kubemarine/sysctl.py index 80f700e94..766465674 100644 --- a/kubemarine/sysctl.py +++ b/kubemarine/sysctl.py @@ -68,17 +68,19 @@ def configure(group: NodeGroup) -> RunnersGroupResult: The configuration will be placed in sysctl daemon directory. """ config = make_config(group.cluster) - group.sudo('rm -f /etc/sysctl.d/98-*-sysctl.conf') + dry_run = utils.check_dry_run_status_active(group.cluster) + group.sudo('rm -f /etc/sysctl.d/98-*-sysctl.conf', dry_run=dry_run) utils.dump_file(group.cluster, config, '98-kubemarine-sysctl.conf') - group.put(io.StringIO(config), '/etc/sysctl.d/98-kubemarine-sysctl.conf', backup=True, sudo=True) - return group.sudo('ls -la /etc/sysctl.d/98-kubemarine-sysctl.conf') + group.put(io.StringIO(config), '/etc/sysctl.d/98-kubemarine-sysctl.conf', backup=True, sudo=True, dry_run=dry_run) + return group.sudo('ls -la /etc/sysctl.d/98-kubemarine-sysctl.conf', dry_run=dry_run) def reload(group: NodeGroup) -> RunnersGroupResult: """ Reloads sysctl configuration in the specified group. """ - return group.sudo('sysctl -p /etc/sysctl.d/98-*-sysctl.conf') + return group.sudo('sysctl -p /etc/sysctl.d/98-*-sysctl.conf', + dry_run=utils.check_dry_run_status_active(group.cluster)) def get_pid_max(inventory: dict) -> int: diff --git a/kubemarine/system.py b/kubemarine/system.py index 0e8e4bc3b..be6abb09e 100644 --- a/kubemarine/system.py +++ b/kubemarine/system.py @@ -173,11 +173,11 @@ def detect_of_family_by_name_version(name: str, version: str) -> str: return os_family -def update_resolv_conf(group: NodeGroup, config: dict) -> None: +def update_resolv_conf(group: NodeGroup, config: dict, dry_run=False) -> None: # TODO: Use Jinja template buffer = get_resolv_conf_buffer(config) utils.dump_file(group.cluster, buffer, 'resolv.conf') - group.put(buffer, "/etc/resolv.conf", backup=True, immutable=True, sudo=True) + group.put(buffer, "/etc/resolv.conf", backup=True, immutable=True, sudo=True, dry_run=dry_run) def get_resolv_conf_buffer(config: dict) -> io.StringIO: @@ -214,9 +214,9 @@ def generate_etc_hosts_config(inventory: dict, etc_hosts_part: str = 'etc_hosts_ return result -def update_etc_hosts(group: NodeGroup, config: str) -> None: +def update_etc_hosts(group: NodeGroup, config: str, dry_run=False) -> None: utils.dump_file(group.cluster, config, 'etc_hosts') - group.put(io.StringIO(config), "/etc/hosts", backup=True, sudo=True) + group.put(io.StringIO(config), "/etc/hosts", backup=True, sudo=True, dry_run=dry_run) def stop_service(group: AbstractGroup[GROUP_RUN_TYPE], name: str, callback: Callback = None) -> GROUP_RUN_TYPE: @@ -253,7 +253,7 @@ def disable_service(group: AbstractGroup[GROUP_RUN_TYPE], name: str = None, cmd = 'systemctl disable %s' % name if now: cmd = cmd + " --now" - return group.sudo(cmd, callback=callback) + return group.sudo(cmd, callback=callback, dry_run=utils.check_dry_run_status_active(group.cluster)) def patch_systemd_service(group: DeferredGroup, service_name: str, patch_source: str) -> None: @@ -321,9 +321,9 @@ def disable_swap(group: NodeGroup) -> Optional[RunnersGroupResult]: return result log.verbose("Switching swap off...") - - group.sudo('swapoff -a', warn=True) - group.sudo('sed -i.bak \'/swap/d\' /etc/fstab', warn=True) + dry_run = utils.check_dry_run_status_active(group.cluster) + group.sudo('swapoff -a', warn=True, dry_run=dry_run) + group.sudo('sed -i.bak \'/swap/d\' /etc/fstab', warn=True, dry_run=dry_run) group.cluster.schedule_cumulative_point(reboot_nodes) group.cluster.schedule_cumulative_point(verify_system) @@ -332,12 +332,18 @@ def disable_swap(group: NodeGroup) -> Optional[RunnersGroupResult]: def reboot_nodes(cluster: KubernetesCluster) -> None: + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] Performing reboot") + return cluster.nodes["all"].get_new_nodes_or_self().call(reboot_group) def reboot_group(group: NodeGroup, try_graceful: bool = None) -> RunnersGroupResult: cluster: KubernetesCluster = group.cluster log = cluster.log + if utils.check_dry_run_status_active(cluster): + log.debug("[dry-run] Rebooting Nodes...") + return if try_graceful is None: if 'controlplain_uri' not in cluster.context.keys(): @@ -388,8 +394,8 @@ def perform_group_reboot(group: NodeGroup) -> RunnersGroupResult: return result -def reload_systemctl(group: AbstractGroup[GROUP_RUN_TYPE]) -> GROUP_RUN_TYPE: - return group.sudo('systemctl daemon-reload') +def reload_systemctl(group: AbstractGroup[GROUP_RUN_TYPE], dry_run=False) -> GROUP_RUN_TYPE: + return group.sudo('systemctl daemon-reload', dry_run=dry_run) def configure_chronyd(group: NodeGroup, retries: int = 60) -> RunnersGroupResult: @@ -409,6 +415,11 @@ def configure_chronyd(group: NodeGroup, retries: int = 60) -> RunnersGroupResult utils.dump_file(cluster, chronyd_config, 'chrony.conf') group.put(io.StringIO(chronyd_config), '/etc/chrony.conf', backup=True, sudo=True) group.sudo('systemctl restart chronyd') + + if utils.check_dry_run_status_active(group.cluster): + log.debug("[dry-run] Successfully configured chronyd") + return + while retries > 0: log.debug("Waiting for time sync, retries left: %s" % retries) results = group.sudo('chronyc tracking && sudo chronyc sources') @@ -454,6 +465,9 @@ def configure_timesyncd(group: NodeGroup, retries: int = 120) -> RunnersGroupRes '&& sudo systemctl restart systemd-timesyncd.service ' '&& sudo systemctl status systemd-timesyncd.service') log.verbose(res) + if utils.check_dry_run_status_active(group.cluster): + log.debug("[dry-run] Successfully configured timesyncd") + return while retries > 0: log.debug("Waiting for time sync, retries left: %s" % retries) results = group.sudo('timedatectl timesync-status && sudo timedatectl status') @@ -502,8 +516,10 @@ def setup_modprobe(group: NodeGroup) -> Optional[RunnersGroupResult]: log.debug("Uploading config...") utils.dump_file(group.cluster, config, 'modprobe_predefined.conf') - group.put(io.StringIO(config), "/etc/modules-load.d/predefined.conf", backup=True, sudo=True) - group.sudo("modprobe -a %s" % raw_config) + + dry_run = utils.check_dry_run_status_active(group.cluster) + group.put(io.StringIO(config), "/etc/modules-load.d/predefined.conf", backup=True, sudo=True, dry_run=dry_run) + group.sudo("modprobe -a %s" % raw_config, dry_run=dry_run) group.cluster.schedule_cumulative_point(reboot_nodes) group.cluster.schedule_cumulative_point(verify_system) @@ -528,6 +544,9 @@ def is_modprobe_valid(group: NodeGroup) -> Tuple[bool, RunnersGroupResult]: def verify_system(cluster: KubernetesCluster) -> None: + if utils.check_dry_run_status_active(cluster): + cluster.log.debug("[dry-run] verifying system") + return None group = cluster.nodes["all"].get_new_nodes_or_self() log = cluster.log # this method handles clusters with multiple OS diff --git a/kubemarine/thirdparties.py b/kubemarine/thirdparties.py index 9ec72bc63..dba097688 100644 --- a/kubemarine/thirdparties.py +++ b/kubemarine/thirdparties.py @@ -264,7 +264,7 @@ def enrich_inventory_apply_defaults(inventory: dict, cluster: KubernetesCluster) crictl_key = '/usr/bin/crictl.tar.gz' if cri_name == "docker" and \ crictl_key not in cluster.raw_inventory.get('services', {}).get('thirdparties', {}): - del(thirdparties[crictl_key]) + del thirdparties[crictl_key] return inventory @@ -291,6 +291,7 @@ def get_group_require_unzip(cluster: KubernetesCluster, inventory: dict) -> Node def install_thirdparty(filter_group: NodeGroup, destination: str) -> Optional[RunnersGroupResult]: cluster = filter_group.cluster + dry_run = utils.check_dry_run_status_active(cluster) config = cluster.inventory['services'].get('thirdparties', {}).get(destination) if config is None: @@ -331,11 +332,11 @@ def install_thirdparty(filter_group: NodeGroup, destination: str) -> Optional[Ru remote_commands += ' && sudo rm -f %s && sudo curl --max-time %d -f -g -L %s -o %s && ' % (destination, cluster.inventory['globals']['timeout_download'], config['source'], destination) else: cluster.log.verbose('Installation via sftp upload detected') - cluster.log.debug(common_group.sudo(remote_commands)) + cluster.log.debug(common_group.sudo(remote_commands, dry_run=dry_run)) remote_commands = '' # TODO: Possible use SHA1 from inventory instead of calculating if provided? script = utils.read_internal(config['source']) - common_group.put(io.StringIO(script), destination, sudo=True) + common_group.put(io.StringIO(script), destination, sudo=True, dry_run=dry_run) # TODO: Do not upload local files if they already exists on remote machines @@ -352,25 +353,24 @@ def install_thirdparty(filter_group: NodeGroup, destination: str) -> Optional[Ru if extension == 'zip': cluster.log.verbose('Unzip will be used for unpacking') remote_commands += ' && sudo unzip -o %s -d %s' % (destination, config['unpack']) - + remote_commands += ' && sudo unzip -qq -l %s | awk \'NF > 3 { print $4 }\'| xargs -I FILE sudo chmod %s %s/FILE' \ % (destination, config['mode'], config['unpack']) remote_commands += ' && sudo unzip -qq -l %s | awk \'NF > 3 { print $4 }\'| xargs -I FILE sudo chown -R %s %s/FILE' \ % (destination, config['owner'], config['unpack']) remote_commands += ' && sudo unzip -qq -l %s | awk \'NF > 3 { print $4 }\'| xargs -I FILE sudo ls -la %s/FILE' % (destination, config['unpack']) - + else: cluster.log.verbose('Tar will be used for unpacking') remote_commands += ' && sudo tar -zxf %s -C %s' % (destination, config['unpack']) - + remote_commands += ' && sudo tar -tf %s | xargs -I FILE sudo chmod %s %s/FILE' \ % (destination, config['mode'], config['unpack']) remote_commands += ' && sudo tar -tf %s | xargs -I FILE sudo chown %s %s/FILE' \ % (destination, config['owner'], config['unpack']) remote_commands += ' && sudo tar -tf %s | xargs -I FILE sudo ls -la %s/FILE' % (destination, config['unpack']) - - return common_group.sudo(remote_commands) + return common_group.sudo(remote_commands, dry_run=dry_run) def install_all_thirparties(group: NodeGroup) -> None: diff --git a/kubemarine/yum.py b/kubemarine/yum.py index 0d923891d..aebdb9c05 100644 --- a/kubemarine/yum.py +++ b/kubemarine/yum.py @@ -31,15 +31,17 @@ def backup_repo(group: NodeGroup) -> Optional[RunnersGroupResult]: if not group.cluster.inventory['services']['packages']['package_manager']['replace-repositories']: group.cluster.log.debug("Skipped - repos replacement disabled in configuration") return None + dry_run = utils.check_dry_run_status_active(group.cluster) # all files in directory will be renamed: xxx.repo -> xxx.repo.bak # if there already any files with ".bak" extension, they should not be renamed to ".bak.bak"! return group.sudo("find /etc/yum.repos.d/ -type f -name '*.repo' | " - "sudo xargs -t -iNAME mv -bf NAME NAME.bak") + "sudo xargs -t -iNAME mv -bf NAME NAME.bak", dry_run=dry_run) def add_repo(group: NodeGroup, repo_data: Union[List[str], Dict[str, dict], str]) -> RunnersGroupResult: + dry_run = utils.check_dry_run_status_active(group.cluster) create_repo_file(group, repo_data, get_repo_file_name()) - return group.sudo('yum clean all && sudo yum updateinfo') + return group.sudo('yum clean all && sudo yum updateinfo', dry_run=dry_run) def get_repo_file_name() -> str: @@ -48,7 +50,7 @@ def get_repo_file_name() -> str: def create_repo_file(group: AbstractGroup[RunResult], repo_data: Union[List[str], Dict[str, dict], str], - repo_file: str) -> None: + repo_file: str, dry_run=False) -> None: # if repo_data is dict, then convert it to string with config inside if isinstance(repo_data, dict): config = configparser.ConfigParser() @@ -61,7 +63,7 @@ def create_repo_file(group: AbstractGroup[RunResult], else: repo_data_stream = io.StringIO(utils.read_external(repo_data)) - group.put(repo_data_stream, repo_file, sudo=True) + group.put(repo_data_stream, repo_file, sudo=True, dry_run=dry_run) def clean(group: NodeGroup) -> RunnersGroupResult: @@ -92,7 +94,7 @@ def install(group: AbstractGroup[GROUP_RUN_TYPE], include: Union[str, List[str]] command = get_install_cmd(include, exclude) - return group.sudo(command, callback=callback) + return group.sudo(command, callback=callback, dry_run=utils.check_dry_run_status_active(group.cluster)) def remove(group: AbstractGroup[GROUP_RUN_TYPE], include: Union[str, List[str]] = None, @@ -110,7 +112,7 @@ def remove(group: AbstractGroup[GROUP_RUN_TYPE], include: Union[str, List[str]] exclude = ','.join(exclude) command += ' --exclude=%s' % exclude - return group.sudo(command, warn=warn, hide=hide) + return group.sudo(command, warn=warn, hide=hide, dry_run=utils.check_dry_run_status_active(group.cluster)) def upgrade(group: AbstractGroup[GROUP_RUN_TYPE], include: Union[str, List[str]] = None, @@ -127,7 +129,7 @@ def upgrade(group: AbstractGroup[GROUP_RUN_TYPE], include: Union[str, List[str]] exclude = ','.join(exclude) command += ' --exclude=%s' % exclude - return group.sudo(command) + return group.sudo(command, dry_run=utils.check_dry_run_status_active(group.cluster)) def no_changes_found(action: str, result: RunnersResult) -> bool: