diff --git a/docs/tutorial/yurtctl.md b/docs/tutorial/yurtctl.md index 4bf54f47f6a..a0c5c2e6688 100644 --- a/docs/tutorial/yurtctl.md +++ b/docs/tutorial/yurtctl.md @@ -188,18 +188,6 @@ I0831 12:36:22.109368 77322 convert.go:292] the yurt-hub is deployed To verify that the yurttunnel works as expected, please refer to the [yurttunnel tutorial](https://github.com/openyurtio/openyurt/blob/master/docs/tutorial/yurt-tunnel.md) -## Set the path of configuration -Sometimes the configuration of the node may be different. Users can set the path of the kubelet service configuration -by the option `--kubeadm-conf-path`, which is used by kubelet component to join the cluster on the edge node. -``` -$ _output/bin/yurtctl convert --kubeadm-conf-path /etc/systemd/system/kubelet.service.d/10-kubeadm.conf -``` -The path of the directory on edge node containing static pod files can be set by the -option `--pod-manifest-path`. -``` -$ _output/bin/yurtctl convert --pod-manifest-path /etc/kubernetes/manifests -``` - ## Revert/Uninstall OpenYurt Using `yurtctl` to revert an OpenYurt cluster can be done by doing the following: @@ -230,6 +218,28 @@ Using `yurtctl` to join a Cloud-Node to OpenYurt cluster can be by doing the fol $ _output/bin/yurtctl join 1.2.3.4:6443 --token=zffaj3.a5vjzf09qn9ft3gt --node-type=cloud-node --discovery-token-unsafe-skip-ca-verification --v=5 ``` +## Note +### Disable the default nodelifecycle controller +`yurtctl convert` will turn off the default nodelifecycle controller to allow the yurt-controller-mamanger to work properly. +If kube-controller-manager is deployed as a static pod, yurtctl can modify the `kube-controller-manager.yaml` +according the parameter `--pod-manifest-path ` with default value `/etc/kubernetes/manifests`. +It is also suitable for kube-controller-manager high-availability scenarios. + +But for kube-controller-manager deployed in other ways, the user needs to turn off the default nodelifecycle controller manually. +Please refer to the [Disable the default nodelifecycle controller](https://github.com/openyurtio/openyurt/blob/master/docs/tutorial/manually-setup.md#disable-the-default-nodelifecycle-controller) section. In addition, when using `yurtctl revert`, if kube-controller-manager is not deployed through static file, the user also needs to restore manually. + +### Set the path of configuration +Sometimes the configuration of the node may be different. Users can set the path of the kubelet service configuration +by the option `--kubeadm-conf-path`, which is used by kubelet component to join the cluster on the edge node. +``` +$ _output/bin/yurtctl convert --kubeadm-conf-path /etc/systemd/system/kubelet.service.d/10-kubeadm.conf +``` +The path of the directory on edge node containing static pod files can be set by the +option `--pod-manifest-path`. +``` +$ _output/bin/yurtctl convert --pod-manifest-path /etc/kubernetes/manifests +``` + ## Subcommand ### Convert a Kubernetes node to Yurt edge node diff --git a/pkg/yurtctl/cmd/convert/convert.go b/pkg/yurtctl/cmd/convert/convert.go index c1e6bb76907..89005ebcb64 100644 --- a/pkg/yurtctl/cmd/convert/convert.go +++ b/pkg/yurtctl/cmd/convert/convert.go @@ -279,21 +279,27 @@ func (co *ConvertOptions) RunConvert() (err error) { } klog.V(4).Info("the server version is valid") - // 1.1. check the state of worker nodes + // 1.1. get kube-controller-manager HA nodes + kcmNodeNames, err := kubeutil.GetKubeControllerManagerHANodes(co.clientSet) + if err != nil { + return + } + + // 1.2. check the state of worker nodes and kcm nodes nodeLst, err := co.clientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) if err != nil { return } for _, node := range nodeLst.Items { - if !strutil.IsInStringLst(co.CloudNodes, node.GetName()) { + if !strutil.IsInStringLst(co.CloudNodes, node.GetName()) || strutil.IsInStringLst(kcmNodeNames, node.GetName()) { _, condition := nodeutil.GetNodeCondition(&node.Status, v1.NodeReady) if condition == nil || condition.Status != v1.ConditionTrue { - klog.Errorf("Cannot do the convert, the status of worker node: %s is not 'Ready'.", node.Name) + klog.Errorf("Cannot do the convert, the status of worker node or kube-controller-manager node: %s is not 'Ready'.", node.Name) return } } } - klog.V(4).Info("the status of worker nodes are satisfied") + klog.V(4).Info("the status of worker nodes and kube-controller-manager nodes are satisfied") // 2. label nodes as cloud node or edge node var edgeNodeNames []string @@ -340,13 +346,17 @@ func (co *ConvertOptions) RunConvert() (err error) { return } - // 4. delete the system:controller:node-controller clusterrolebinding to disable node-controller - if err = co.clientSet.RbacV1().ClusterRoleBindings().Delete(context.Background(), "system:controller:node-controller", metav1.DeleteOptions{ - PropagationPolicy: &kubeutil.PropagationPolicy, - }); err != nil && !apierrors.IsNotFound(err) { - klog.Errorf("fail to delete clusterrolebinding system:controller:node-controller: %v", err) + // 4. disable node-controller + ctx := map[string]string{ + "action": "disable", + "yurtctl_servant_image": co.YurctlServantImage, + "pod_manifest_path": co.PodMainfestPath, + } + if err = kubeutil.RunServantJobs(co.clientSet, ctx, kcmNodeNames); err != nil { + klog.Errorf("fail to run DisableNodeControllerJobs: %s", err) return } + klog.Info("complete disabling node-controller") // 5. deploy the yurttunnel if required if co.DeployTunnel { @@ -392,7 +402,7 @@ func (co *ConvertOptions) RunConvert() (err error) { return err } - ctx := map[string]string{ + ctx = map[string]string{ "provider": string(co.Provider), "action": "convert", "yurtctl_servant_image": co.YurctlServantImage, @@ -406,11 +416,11 @@ func (co *ConvertOptions) RunConvert() (err error) { ctx["yurthub_healthcheck_timeout"] = co.YurthubHealthCheckTimeout.String() } - if err = kubeutil.RunServantJobs(co.clientSet, ctx, edgeNodeNames, true); err != nil { + if err = kubeutil.RunServantJobs(co.clientSet, ctx, edgeNodeNames); err != nil { klog.Errorf("fail to run ServantJobs: %s", err) return } - klog.Info("the yurt-hub is deployed") + klog.Info("complete deploying yurt-hub") return } diff --git a/pkg/yurtctl/cmd/convert/edgenode.go b/pkg/yurtctl/cmd/convert/edgenode.go index 6a5dd33e619..dc85662fac5 100644 --- a/pkg/yurtctl/cmd/convert/edgenode.go +++ b/pkg/yurtctl/cmd/convert/edgenode.go @@ -237,7 +237,7 @@ func (c *ConvertEdgeNodeOptions) RunConvertEdgeNode() (err error) { ctx["yurthub_healthcheck_timeout"] = c.YurthubHealthCheckTimeout.String() } - if err = kubeutil.RunServantJobs(c.clientSet, ctx, c.EdgeNodes, true); err != nil { + if err = kubeutil.RunServantJobs(c.clientSet, ctx, c.EdgeNodes); err != nil { klog.Errorf("fail to run ServantJobs: %s", err) return err } diff --git a/pkg/yurtctl/cmd/revert/edgenode.go b/pkg/yurtctl/cmd/revert/edgenode.go index d9534f4e500..d4fe6202895 100644 --- a/pkg/yurtctl/cmd/revert/edgenode.go +++ b/pkg/yurtctl/cmd/revert/edgenode.go @@ -189,7 +189,7 @@ func (r *RevertEdgeNodeOptions) RunRevertEdgeNode() (err error) { "pod_manifest_path": r.PodMainfestPath, "kubeadm_conf_path": r.KubeadmConfPath, }, - r.EdgeNodes, false); err != nil { + r.EdgeNodes); err != nil { klog.Errorf("fail to revert edge node: %s", err) return err } diff --git a/pkg/yurtctl/cmd/revert/revert.go b/pkg/yurtctl/cmd/revert/revert.go index 18e22d7a8bc..4146e3507c4 100644 --- a/pkg/yurtctl/cmd/revert/revert.go +++ b/pkg/yurtctl/cmd/revert/revert.go @@ -23,17 +23,17 @@ import ( "github.com/spf13/cobra" "github.com/spf13/pflag" v1 "k8s.io/api/core/v1" - rbacv1 "k8s.io/api/rbac/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/klog" + nodeutil "k8s.io/kubernetes/pkg/controller/util/node" "github.com/openyurtio/openyurt/pkg/projectinfo" "github.com/openyurtio/openyurt/pkg/yurtctl/constants" "github.com/openyurtio/openyurt/pkg/yurtctl/lock" kubeutil "github.com/openyurtio/openyurt/pkg/yurtctl/util/kubernetes" - nodeutil "k8s.io/kubernetes/pkg/controller/util/node" + strutil "github.com/openyurtio/openyurt/pkg/yurtctl/util/strings" ) // RevertOptions has the information required by the revert operation @@ -125,7 +125,13 @@ func (ro *RevertOptions) RunRevert() (err error) { } klog.V(4).Info("the server version is valid") - // 1.1. check the state of worker nodes + // 1.1. get kube-controller-manager HA nodes + kcmNodeNames, err := kubeutil.GetKubeControllerManagerHANodes(ro.clientSet) + if err != nil { + return + } + + // 1.2. check the state of worker nodes nodeLst, err := ro.clientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) if err != nil { return @@ -133,15 +139,15 @@ func (ro *RevertOptions) RunRevert() (err error) { for _, node := range nodeLst.Items { isEdgeNode, ok := node.Labels[projectinfo.GetEdgeWorkerLabelKey()] - if ok && isEdgeNode == "true" { + if ok && isEdgeNode == "true" || strutil.IsInStringLst(kcmNodeNames, node.GetName()) { _, condition := nodeutil.GetNodeCondition(&node.Status, v1.NodeReady) if condition == nil || condition.Status != v1.ConditionTrue { - klog.Errorf("Cannot do the revert, the status of worker node: %s is not 'Ready'.", node.Name) + klog.Errorf("Cannot do the revert, the status of worker or kube-controller-manager node: %s is not 'Ready'.", node.Name) return } } } - klog.V(4).Info("the status of worker nodes are satisfied") + klog.V(4).Info("the status of worker nodes and kube-controller-manager nodes are satisfied") // 2. remove labels from nodes var edgeNodeNames []string @@ -213,30 +219,18 @@ func (ro *RevertOptions) RunRevert() (err error) { return } - // 6. recreate the system:controller:node-controller clustrrolebinding - ncClusterrolebinding := &rbacv1.ClusterRoleBinding{ - ObjectMeta: metav1.ObjectMeta{ - Name: "system:controller:node-controller", - }, - RoleRef: rbacv1.RoleRef{ - APIGroup: "rbac.authorization.k8s.io", - Kind: "ClusterRole", - Name: "system:controller:node-controller", - }, - Subjects: []rbacv1.Subject{ - { - Kind: "ServiceAccount", - Name: "node-controller", - Namespace: "kube-system", - }, + // 6. enable node-controller + if err = kubeutil.RunServantJobs(ro.clientSet, + map[string]string{ + "action": "enable", + "yurtctl_servant_image": ro.YurtctlServantImage, + "pod_manifest_path": ro.PodMainfestPath, }, - } - if _, err = ro.clientSet.RbacV1().ClusterRoleBindings().Create(context.Background(), ncClusterrolebinding, - metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { - klog.Errorf("fail to create clusterrolebinding system:controller:node-controller: %v", err) + kcmNodeNames); err != nil { + klog.Errorf("fail to run EnableNodeControllerJobs: %s", err) return } - klog.Info("clusterrolebinding system:controller:node-controller is created") + klog.Info("complete enabling node-controller") // 7. remove yurt-hub and revert kubelet service if err = kubeutil.RunServantJobs(ro.clientSet, @@ -246,11 +240,11 @@ func (ro *RevertOptions) RunRevert() (err error) { "pod_manifest_path": ro.PodMainfestPath, "kubeadm_conf_path": ro.KubeadmConfPath, }, - edgeNodeNames, false); err != nil { + edgeNodeNames); err != nil { klog.Errorf("fail to revert edge node: %s", err) return } - klog.Info("yurt-hub is removed, kubelet service is reset") + klog.Info("complete removing yurt-hub and resetting kubelet service") return } diff --git a/pkg/yurtctl/constants/constants.go b/pkg/yurtctl/constants/constants.go index 227d55fffda..fae3378c928 100644 --- a/pkg/yurtctl/constants/constants.go +++ b/pkg/yurtctl/constants/constants.go @@ -184,7 +184,7 @@ spec: containers: - name: yurtctl-servant image: {{.yurtctl_servant_image}} - imagePullPolicy: Always + imagePullPolicy: IfNotPresent command: - /bin/sh - -c @@ -229,7 +229,7 @@ spec: containers: - name: yurtctl-servant image: {{.yurtctl_servant_image}} - imagePullPolicy: Always + imagePullPolicy: IfNotPresent command: - /bin/sh - -c @@ -251,5 +251,57 @@ spec: - name: KUBELET_SVC value: {{.kubeadm_conf_path}} {{end}} +` + // DisableNodeControllerJobTemplate defines the node-controller disable job in yaml format + DisableNodeControllerJobTemplate = ` +apiVersion: batch/v1 +kind: Job +metadata: + name: {{.jobName}} + namespace: kube-system +spec: + template: + spec: + hostPID: true + hostNetwork: true + restartPolicy: OnFailure + nodeName: {{.nodeName}} + containers: + - name: yurtctl-disable-node-controller + image: {{.yurtctl_servant_image}} + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + args: + - "nsenter -t 1 -m -u -n -i -- sed -i 's/--controllers=/--controllers=-nodelifecycle,/g' {{.pod_manifest_path}}/kube-controller-manager.yaml" + securityContext: + privileged: true +` + // EnableNodeControllerJobTemplate defines the node-controller enable job in yaml format + EnableNodeControllerJobTemplate = ` +apiVersion: batch/v1 +kind: Job +metadata: + name: {{.jobName}} + namespace: kube-system +spec: + template: + spec: + hostPID: true + hostNetwork: true + restartPolicy: OnFailure + nodeName: {{.nodeName}} + containers: + - name: yurtctl-enable-node-controller + image: {{.yurtctl_servant_image}} + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + args: + - "nsenter -t 1 -m -u -n -i -- sed -i 's/--controllers=-nodelifecycle,/--controllers=/g' {{.pod_manifest_path}}/kube-controller-manager.yaml" + securityContext: + privileged: true ` ) diff --git a/pkg/yurtctl/util/kubernetes/util.go b/pkg/yurtctl/util/kubernetes/util.go index cc84cdc4df5..55cfb40f220 100644 --- a/pkg/yurtctl/util/kubernetes/util.go +++ b/pkg/yurtctl/util/kubernetes/util.go @@ -65,6 +65,10 @@ const ( ConvertJobNameBase = "yurtctl-servant-convert" // RevertJobNameBase is the prefix of the revert ServantJob name RevertJobNameBase = "yurtctl-servant-revert" + // DisableNodeControllerJobNameBase is the prefix of the DisableNodeControllerJob name + DisableNodeControllerJobNameBase = "yurtctl-disable-node-controller" + // EnableNodeControllerJobNameBase is the prefix of the EnableNodeControllerJob name + EnableNodeControllerJobNameBase = "yurtctl-enable-node-controller" ) var ( @@ -425,28 +429,33 @@ func RunJobAndCleanup(cliSet *kubernetes.Clientset, job *batchv1.Job, timeout, p } // RunServantJobs launch servant jobs on specified edge nodes -func RunServantJobs(cliSet *kubernetes.Clientset, tmplCtx map[string]string, edgeNodeNames []string, convert bool) error { +func RunServantJobs(cliSet *kubernetes.Clientset, tmplCtx map[string]string, edgeNodeNames []string) error { var wg sync.WaitGroup - servantJobTemplate := constants.ConvertServantJobTemplate - if !convert { + var servantJobTemplate, jobBaseName string + action, exist := tmplCtx["action"] + if !exist { + return errors.New("action is not specified") + } + switch action { + case "convert": + servantJobTemplate = constants.ConvertServantJobTemplate + jobBaseName = ConvertJobNameBase + case "revert": servantJobTemplate = constants.RevertServantJobTemplate + jobBaseName = RevertJobNameBase + case "disable": + servantJobTemplate = constants.DisableNodeControllerJobTemplate + jobBaseName = DisableNodeControllerJobNameBase + case "enable": + servantJobTemplate = constants.EnableNodeControllerJobTemplate + jobBaseName = EnableNodeControllerJobNameBase + default: + return fmt.Errorf("unknown action: %s", action) } - for _, nodeName := range edgeNodeNames { - action, exist := tmplCtx["action"] - if !exist { - return errors.New("action is not specified") - } - switch action { - case "convert": - tmplCtx["jobName"] = ConvertJobNameBase + "-" + nodeName - case "revert": - tmplCtx["jobName"] = RevertJobNameBase + "-" + nodeName - default: - return fmt.Errorf("unknown action: %s", action) - } + for _, nodeName := range edgeNodeNames { + tmplCtx["jobName"] = jobBaseName + "-" + nodeName tmplCtx["nodeName"] = nodeName - jobYaml, err := tmplutil.SubsituteTemplate(servantJobTemplate, tmplCtx) if err != nil { return err @@ -591,3 +600,19 @@ func GetOrCreateJoinTokenString(cliSet *kubernetes.Clientset) (string, error) { } return tokenStr, nil } + +// find kube-controller-manager deployed through static file +func GetKubeControllerManagerHANodes(cliSet *kubernetes.Clientset) ([]string, error) { + var kcmNodeNames []string + podLst, err := cliSet.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{}) + if err != nil { + return nil, err + } + for _, pod := range podLst.Items { + kcmPodName := fmt.Sprintf("kube-controller-manager-%s", pod.Spec.NodeName) + if kcmPodName == pod.Name { + kcmNodeNames = append(kcmNodeNames, pod.Spec.NodeName) + } + } + return kcmNodeNames, nil +}