Skip to content

Commit

Permalink
Fix multi-node cluster not working after restarting docker
Browse files Browse the repository at this point in the history
In a multi-node cluster with single controlplane node, if the
controlplane node's IP changes, kube-controller-manager and
kube-scheduler would fail to connect kube-apiserver. Updating the
server address to new IP doesn't work because the API server's
certificate isn't valid for it.

This patch uses "patches" option of kubeadm to replace the server
address in the kubeconfig files of kube-controller-manager and
kube-scheduler with loopback address, which is an alternative address
of the API server's certificate.

Signed-off-by: Quan Tian <[email protected]>
  • Loading branch information
tnqn committed May 7, 2022
1 parent 575a480 commit 1b14fe8
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 7 deletions.
66 changes: 59 additions & 7 deletions pkg/cluster/internal/create/actions/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"sigs.k8s.io/kind/pkg/cluster/internal/providers/common"
"sigs.k8s.io/kind/pkg/cluster/nodeutils"
"sigs.k8s.io/kind/pkg/internal/apis/config"
"sigs.k8s.io/kind/pkg/internal/version"
)

// Action implements action for creating the node config files
Expand Down Expand Up @@ -87,6 +88,23 @@ func (a *Action) Execute(ctx *actions.ActionContext) error {
kubeadmConfigPlusPatches := func(node nodes.Node, data kubeadm.ConfigData) func() error {
return func() error {
data.NodeName = node.String()
kubeVersion, err := nodeutils.KubeVersion(node)
if err != nil {
// TODO(bentheelder): logging here
return errors.Wrap(err, "failed to get kubernetes version from node")
}
data.KubernetesVersion = kubeVersion

patches, err := getKubeadmPatches(data)
if err != nil {
return errors.Wrap(err, "failed to generate kubeadm patches content")
}

ctx.Logger.V(2).Infof("Using the following kubeadm patches for node %s:\n%s", node.String(), patches)
if err := writeKubeadmPatches(patches, node); err != nil {
return err
}

kubeadmConfig, err := getKubeadmConfig(ctx.Config, data, node, provider)
if err != nil {
// TODO(bentheelder): logging here
Expand Down Expand Up @@ -152,16 +170,34 @@ func (a *Action) Execute(ctx *actions.ActionContext) error {
return nil
}

// getKubeadmConfig generates the kubeadm config contents for the cluster
// by running data through the template and applying patches as needed.
func getKubeadmConfig(cfg *config.Cluster, data kubeadm.ConfigData, node nodes.Node, provider string) (path string, err error) {
kubeVersion, err := nodeutils.KubeVersion(node)
// getKubeadmPatches generates the kubeadm patch contents. It returns a map of patch file name to patch content.
func getKubeadmPatches(data kubeadm.ConfigData) (map[string]string, error) {
ver, err := version.ParseGeneric(data.KubernetesVersion)
if err != nil {
// TODO(bentheelder): logging here
return "", errors.Wrap(err, "failed to get kubernetes version from node")
return nil, err
}

patches := map[string]string{}
// Kubernetes older than v1.25 don't support patching kubeconfig files.
if ver.AtLeast(version.MustParseSemantic("v1.25.0")) {
// controller-manager and scheduler connect to local API endpoint, which defaults to the advertise address of the
// API server. If the Node's IP changes (which could happen after docker restarts), the server address in KubeConfig
// should be updated. However, the server certificate isn't valid for the new IP. To resolve it, we update the
// address to loopback address which is an alternative address of the certificate.
loopbackAddress := "127.0.0.1"
if data.IPFamily == config.IPv6Family {
loopbackAddress = "[::1]"
}
jsonPatch := fmt.Sprintf(`[{"op": "replace", "path": "/clusters/0/cluster/server", "value": "https://%s:%d"}]`, loopbackAddress, data.APIBindPort)
patches["controller-manager.conf+json.json"] = jsonPatch
patches["scheduler.conf+json.json"] = jsonPatch
}
data.KubernetesVersion = kubeVersion
return patches, nil
}

// getKubeadmConfig generates the kubeadm config contents for the cluster
// by running data through the template and applying patches as needed.
func getKubeadmConfig(cfg *config.Cluster, data kubeadm.ConfigData, node nodes.Node, provider string) (path string, err error) {
// TODO: gross hack!
// identify node in config by matching name (since these are named in order)
// we should really just streamline the bootstrap code and maintain
Expand Down Expand Up @@ -258,6 +294,22 @@ func allPatchesFromConfig(cfg *config.Cluster) (patches []string, jsonPatches []
return cfg.KubeadmConfigPatches, cfg.KubeadmConfigPatchesJSON6902
}

// writeKubeadmPatches writes the kubeadm patches in the specified node
func writeKubeadmPatches(patches map[string]string, node nodes.Node) error {
patchesDir := "/kind/patches/"
if err := node.Command("mkdir", "-p", patchesDir).Run(); err != nil {
return errors.Wrapf(err, "failed to create directory %s", patchesDir)
}

for file, patch := range patches {
if err := nodeutils.WriteFile(node, patchesDir+file, patch); err != nil {
return errors.Wrapf(err, "failed to copy patch file %s to node", file)
}
}

return nil
}

// writeKubeadmConfig writes the kubeadm configuration in the specified node
func writeKubeadmConfig(kubeadmConfig string, node nodes.Node) error {
// copy the config to the node
Expand Down
4 changes: 4 additions & 0 deletions pkg/cluster/internal/kubeadm/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,8 @@ apiVersion: kubeadm.k8s.io/v1beta3
kind: InitConfiguration
metadata:
name: config
patches:
directory: /kind/patches
# we use a well know token for TLS bootstrap
bootstrapTokens:
- token: "{{ .Token }}"
Expand All @@ -498,6 +500,8 @@ apiVersion: kubeadm.k8s.io/v1beta3
kind: JoinConfiguration
metadata:
name: config
patches:
directory: /kind/patches
{{ if .ControlPlane -}}
controlPlane:
localAPIEndpoint:
Expand Down

0 comments on commit 1b14fe8

Please sign in to comment.