diff --git a/cmd/minikube/cmd/status.go b/cmd/minikube/cmd/status.go index 3ebd1c4221d3..468973a379b6 100644 --- a/cmd/minikube/cmd/status.go +++ b/cmd/minikube/cmd/status.go @@ -345,8 +345,7 @@ func nodeStatus(api libmachine.API, cc config.ClusterConfig, n config.Node) (*St st.Host = codeNames[InsufficientStorage] } - stk := kverify.KubeletStatus(cr) - klog.Infof("%s kubelet status = %s", name, stk) + stk := kverify.ServiceStatus(cr, "kubelet") st.Kubelet = stk.String() // Early exit for worker nodes diff --git a/pkg/drivers/none/none.go b/pkg/drivers/none/none.go index c7164f257980..6ad1dcca48e9 100644 --- a/pkg/drivers/none/none.go +++ b/pkg/drivers/none/none.go @@ -142,7 +142,7 @@ func (d *Driver) GetState() (state.State, error) { return state.Running, nil } - return kverify.KubeletStatus(d.exec), nil + return kverify.ServiceStatus(d.exec, "kubelet"), nil } // Kill stops a host forcefully, including any containers that we are managing. diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go index 1cac68a2cf2d..76a976e36ad3 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go @@ -35,6 +35,8 @@ const ( AppsRunningKey = "apps_running" // NodeReadyKey is the name used in the flags for waiting for the node status to be ready NodeReadyKey = "node_ready" + // NodeReadyKey is the name used in the flags for waiting for the node status to be ready + KubeletKey = "kubelet" ) // vars related to the --wait flag @@ -42,13 +44,13 @@ var ( // DefaultComponents is map of the the default components to wait for DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true} // NoWaitComponents is map of componets to wait for if specified 'none' or 'false' - NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false} + NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false} // AllComponents is map for waiting for all components. - AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true} + AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, KubeletKey: true} // DefaultWaitList is list of all default components to wait for. only names to be used for start flags. DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey} // AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags. - AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunningKey, NodeReadyKey} + AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunningKey, NodeReadyKey, KubeletKey} // AppsRunningList running list are valid k8s-app components to wait for them to be running AppsRunningList = []string{ "kube-dns", // coredns diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go b/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go index a331f35c18a2..740586b80308 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go @@ -22,7 +22,6 @@ import ( "strings" "time" - "github.com/docker/machine/libmachine/state" "github.com/pkg/errors" core "k8s.io/api/core/v1" meta "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -34,7 +33,6 @@ import ( "k8s.io/minikube/pkg/minikube/config" "k8s.io/minikube/pkg/minikube/cruntime" "k8s.io/minikube/pkg/minikube/logs" - "k8s.io/minikube/pkg/minikube/sysinit" "k8s.io/minikube/pkg/util/retry" ) @@ -155,13 +153,3 @@ func announceProblems(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg conf time.Sleep(kconst.APICallRetryInterval * 15) } } - -// KubeletStatus checks the kubelet status -func KubeletStatus(cr command.Runner) state.State { - klog.Infof("Checking kubelet status ...") - active := sysinit.New(cr).Active("kubelet") - if active { - return state.Running - } - return state.Stopped -} diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/system_svc.go b/pkg/minikube/bootstrapper/bsutil/kverify/system_svc.go new file mode 100644 index 000000000000..352ee7cf9c3e --- /dev/null +++ b/pkg/minikube/bootstrapper/bsutil/kverify/system_svc.go @@ -0,0 +1,60 @@ +/* +Copyright 2020 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package kverify verifies a running Kubernetes cluster is healthy +package kverify + +import ( + "fmt" + "time" + + "github.com/docker/machine/libmachine/state" + "k8s.io/klog/v2" + "k8s.io/minikube/pkg/minikube/command" + "k8s.io/minikube/pkg/minikube/sysinit" + "k8s.io/minikube/pkg/util/retry" +) + +// ServiceStatus checks the status of a systemd or init.d service +func ServiceStatus(cr command.Runner, svc string) state.State { + active := sysinit.New(cr).Active(svc) + if active { + return state.Running + } + return state.Stopped +} + +// WaitForService will wait for a "systemd" or "init.d" service to be running on the node... +// not to be confused with Kubernetes Services +func WaitForService(cr command.Runner, svc string, timeout time.Duration) error { + pStart := time.Now() + klog.Infof("waiting for %s service to be running ....", svc) + kr := func() error { + if st := ServiceStatus(cr, svc); st != state.Running { + return fmt.Errorf("status %s", st) + } + return nil + } + + if err := retry.Local(kr, timeout); err != nil { + return fmt.Errorf("not running: %s", err) + } + + klog.Infof("duration metric: took %s WaitForService to wait for %s.", time.Since(pStart), svc) + + return nil + +} diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 72756f1d5419..9bc36222c84d 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -392,10 +392,13 @@ func (k *Bootstrapper) client(ip string, port int) (*kubernetes.Clientset, error // WaitForNode blocks until the node appears to be healthy func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, timeout time.Duration) error { start := time.Now() - register.Reg.SetStep(register.VerifyingKubernetes) out.T(style.HealthCheck, "Verifying Kubernetes components...") - + // regardless if waiting is set or not, we will make sure kubelet is not stopped + // to solve corner cases when a container is hibernated and once coming back kubelet not running. + if err := k.ensureServiceStarted("kubelet"); err != nil { + klog.Warningf("Couldn't ensure kubelet is started this might cause issues: %v", err) + } // TODO: #7706: for better performance we could use k.client inside minikube to avoid asking for external IP:PORT cp, err := config.PrimaryControlPlane(&cfg) if err != nil { @@ -455,6 +458,12 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time } } } + if cfg.VerifyComponents[kverify.KubeletKey] { + if err := kverify.WaitForService(k.c, "kubelet", timeout); err != nil { + return errors.Wrap(err, "waiting for kubelet") + } + + } if cfg.VerifyComponents[kverify.NodeReadyKey] { if err := kverify.WaitForNodeReady(client, timeout); err != nil { @@ -471,6 +480,15 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time return nil } +// ensureKubeletStarted will start a systemd or init.d service if it is not running. +func (k *Bootstrapper) ensureServiceStarted(svc string) error { + if st := kverify.ServiceStatus(k.c, svc); st != state.Running { + klog.Warningf("surprisingly %q service status was %s!. will try to start it, could be related to this issue https://github.com/kubernetes/minikube/issues/9458", svc, st) + return sysinit.New(k.c).Start(svc) + } + return nil +} + // needsReconfigure returns whether or not the cluster needs to be reconfigured func (k *Bootstrapper) needsReconfigure(conf string, hostname string, port int, client *kubernetes.Clientset, version string) bool { if rr, err := k.c.RunCmd(exec.Command("sudo", "diff", "-u", conf, conf+".new")); err != nil { diff --git a/pkg/minikube/node/start.go b/pkg/minikube/node/start.go index b2753f9cf9aa..6df0b0588fcc 100644 --- a/pkg/minikube/node/start.go +++ b/pkg/minikube/node/start.go @@ -196,7 +196,7 @@ func Start(starter Starter, apiServer bool) (*kubeconfig.Settings, error) { } } - klog.Infof("Will wait %s for node ...", waitTimeout) + klog.Infof("Will wait %s for node up to ", viper.GetDuration(waitTimeout)) if err := bs.WaitForNode(*starter.Cfg, *starter.Node, viper.GetDuration(waitTimeout)); err != nil { return nil, errors.Wrapf(err, "wait %s for node", viper.GetDuration(waitTimeout)) } diff --git a/site/content/en/docs/commands/start.md b/site/content/en/docs/commands/start.md index ec69092d85fd..43c6c5108135 100644 --- a/site/content/en/docs/commands/start.md +++ b/site/content/en/docs/commands/start.md @@ -90,7 +90,7 @@ minikube start [flags] --uuid string Provide VM UUID to restore MAC address (hyperkit driver only) --vm Filter to use only VM Drivers --vm-driver driver DEPRECATED, use driver instead. - --wait strings comma separated list of Kubernetes components to verify and wait for after starting a cluster. defaults to "apiserver,system_pods", available options: "apiserver,system_pods,default_sa,apps_running,node_ready" . other acceptable values are 'all' or 'none', 'true' and 'false' (default [apiserver,system_pods]) + --wait strings comma separated list of Kubernetes components to verify and wait for after starting a cluster. defaults to "apiserver,system_pods", available options: "apiserver,system_pods,default_sa,apps_running,node_ready,kubelet" . other acceptable values are 'all' or 'none', 'true' and 'false' (default [apiserver,system_pods]) --wait-timeout duration max time to wait per Kubernetes or host to be healthy. (default 6m0s) ```