Skip to content

Commit

Permalink
bootstrap: Retry node joins as well
Browse files Browse the repository at this point in the history
Signed-off-by: Naadir Jeewa <[email protected]>
  • Loading branch information
Naadir Jeewa committed Mar 25, 2020
1 parent 2b2bfb9 commit f773fba
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 72 deletions.
31 changes: 16 additions & 15 deletions bootstrap/kubeadm/controllers/kubeadmconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,12 +422,13 @@ func (r *KubeadmConfigReconciler) joinWorker(ctx context.Context, scope *Scope)

cloudJoinData, err := cloudinit.NewNode(&cloudinit.NodeInput{
BaseUserData: cloudinit.BaseUserData{
AdditionalFiles: scope.Config.Spec.Files,
NTP: scope.Config.Spec.NTP,
PreKubeadmCommands: scope.Config.Spec.PreKubeadmCommands,
PostKubeadmCommands: scope.Config.Spec.PostKubeadmCommands,
Users: scope.Config.Spec.Users,
KubeadmVerbosity: verbosityFlag,
AdditionalFiles: scope.Config.Spec.Files,
NTP: scope.Config.Spec.NTP,
PreKubeadmCommands: scope.Config.Spec.PreKubeadmCommands,
PostKubeadmCommands: scope.Config.Spec.PostKubeadmCommands,
Users: scope.Config.Spec.Users,
KubeadmVerbosity: verbosityFlag,
UseExperimentalRetry: scope.Config.Spec.UseExperimentalRetryJoin,
},
JoinConfiguration: joinData,
})
Expand Down Expand Up @@ -489,16 +490,16 @@ func (r *KubeadmConfigReconciler) joinControlplane(ctx context.Context, scope *S
}

cloudJoinData, err := cloudinit.NewJoinControlPlane(&cloudinit.ControlPlaneJoinInput{
JoinConfiguration: joinData,
Certificates: certificates,
UseExperimentalRetry: scope.Config.Spec.UseExperimentalRetryJoin,
JoinConfiguration: joinData,
Certificates: certificates,
BaseUserData: cloudinit.BaseUserData{
AdditionalFiles: scope.Config.Spec.Files,
NTP: scope.Config.Spec.NTP,
PreKubeadmCommands: scope.Config.Spec.PreKubeadmCommands,
PostKubeadmCommands: scope.Config.Spec.PostKubeadmCommands,
Users: scope.Config.Spec.Users,
KubeadmVerbosity: verbosityFlag,
AdditionalFiles: scope.Config.Spec.Files,
NTP: scope.Config.Spec.NTP,
PreKubeadmCommands: scope.Config.Spec.PreKubeadmCommands,
PostKubeadmCommands: scope.Config.Spec.PostKubeadmCommands,
Users: scope.Config.Spec.Users,
KubeadmVerbosity: verbosityFlag,
UseExperimentalRetry: scope.Config.Spec.UseExperimentalRetryJoin,
},
})
if err != nil {
Expand Down
58 changes: 49 additions & 9 deletions bootstrap/kubeadm/internal/cloudinit/cloudinit.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,51 @@ package cloudinit

import (
"bytes"
"fmt"
"text/template"

"github.com/pkg/errors"
bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1alpha3"
)

const (
cloudConfigHeader = `## template: jinja
standardJoinCommand = "kubeadm join --config /tmp/kubeadm-join-config.yaml %s"
retriableJoinScriptName = "/usr/local/bin/kubeadm-bootstrap-script"
retriableJoinScriptOwner = "root"
retriableJoinScriptPermissions = "0755"
cloudConfigHeader = `## template: jinja
#cloud-config
`
)

// BaseUserData is shared across all the various types of files written to disk.
type BaseUserData struct {
Header string
PreKubeadmCommands []string
PostKubeadmCommands []string
AdditionalFiles []bootstrapv1.File
WriteFiles []bootstrapv1.File
Users []bootstrapv1.User
NTP *bootstrapv1.NTP
KubeadmVerbosity string
Header string
PreKubeadmCommands []string
PostKubeadmCommands []string
AdditionalFiles []bootstrapv1.File
WriteFiles []bootstrapv1.File
Users []bootstrapv1.User
NTP *bootstrapv1.NTP
ControlPlane bool
UseExperimentalRetry bool
KubeadmCommand string
KubeadmVerbosity string
}

func (input *BaseUserData) prepare() error {
input.Header = cloudConfigHeader
input.WriteFiles = append(input.WriteFiles, input.AdditionalFiles...)
input.KubeadmCommand = fmt.Sprintf(standardJoinCommand, input.KubeadmVerbosity)
if input.UseExperimentalRetry {
input.KubeadmCommand = retriableJoinScriptName
joinScriptFile, err := generateBootstrapScript(input)
if err != nil {
return errors.Wrap(err, "failed to generate user data for machine joining control plane")
}
input.WriteFiles = append(input.WriteFiles, *joinScriptFile)
}
return nil
}

func generate(kind string, tpl string, data interface{}) ([]byte, error) {
Expand Down Expand Up @@ -72,3 +95,20 @@ func generate(kind string, tpl string, data interface{}) ([]byte, error) {

return out.Bytes(), nil
}

func generateBootstrapScript(input interface{}) (*bootstrapv1.File, error) {
scriptBytes, err := bootstrapKubeadmInternalCloudinitKubeadmBootstrapScriptShBytes()
if err != nil {
return nil, errors.Wrap(err, "couldn't read bootstrap script")
}
joinScript, err := generate("JoinScript", string(scriptBytes), input)
if err != nil {
return nil, errors.Wrap(err, "failed to bootstrap script for machine joins")
}
return &bootstrapv1.File{
Path: retriableJoinScriptName,
Owner: retriableJoinScriptOwner,
Permissions: retriableJoinScriptPermissions,
Content: string(joinScript),
}, nil
}
47 changes: 6 additions & 41 deletions bootstrap/kubeadm/internal/cloudinit/controlplane_join.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,14 @@ limitations under the License.
package cloudinit

import (
"fmt"

"github.com/pkg/errors"
bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1alpha3"
"sigs.k8s.io/cluster-api/util/secret"
)

const (
standardJoinCommand = "kubeadm join --config /tmp/kubeadm-controlplane-join-config.yaml %s"
retriableJoinScriptName = "/usr/local/bin/kubeadm-bootstrap-script"
retriableJoinScriptOwner = "root"
retriableJoinScriptPermissions = "0755"

controlPlaneJoinCloudInit = `{{.Header}}
{{template "files" .WriteFiles}}
- path: /tmp/kubeadm-controlplane-join-config.yaml
- path: /tmp/kubeadm-join-config.yaml
owner: root:root
permissions: '0640'
content: |
Expand All @@ -50,24 +42,17 @@ runcmd:
type ControlPlaneJoinInput struct {
BaseUserData
secret.Certificates
UseExperimentalRetry bool
KubeadmCommand string
BootstrapToken string
JoinConfiguration string
BootstrapToken string
JoinConfiguration string
}

// NewJoinControlPlane returns the user data string to be used on a new control plane instance.
func NewJoinControlPlane(input *ControlPlaneJoinInput) ([]byte, error) {
input.Header = cloudConfigHeader
// TODO: Consider validating that the correct certificates exist. It is different for external/stacked etcd
input.WriteFiles = input.Certificates.AsFiles()
input.WriteFiles = append(input.WriteFiles, input.AdditionalFiles...)
input.KubeadmCommand = fmt.Sprintf(standardJoinCommand, input.KubeadmVerbosity)
if input.UseExperimentalRetry {
err := input.useBootstrapScript()
if err != nil {
return nil, err
}
input.ControlPlane = true
if err := input.prepare(); err != nil {
return nil, err
}
userData, err := generate("JoinControlplane", controlPlaneJoinCloudInit, input)
if err != nil {
Expand All @@ -76,23 +61,3 @@ func NewJoinControlPlane(input *ControlPlaneJoinInput) ([]byte, error) {

return userData, err
}

func (input *ControlPlaneJoinInput) useBootstrapScript() error {
scriptBytes, err := bootstrapKubeadmInternalCloudinitKubeadmBootstrapScriptShBytes()
if err != nil {
return errors.Wrap(err, "couldn't read bootstrap script")
}
joinScript, err := generate("JoinControlplaneScript", string(scriptBytes), input)
if err != nil {
return errors.Wrap(err, "failed to generate user data for machine joining control plane")
}
joinScriptFile := bootstrapv1.File{
Path: retriableJoinScriptName,
Owner: retriableJoinScriptOwner,
Permissions: retriableJoinScriptPermissions,
Content: string(joinScript),
}
input.WriteFiles = append(input.WriteFiles, joinScriptFile)
input.KubeadmCommand = retriableJoinScriptName
return nil
}
12 changes: 10 additions & 2 deletions bootstrap/kubeadm/internal/cloudinit/kubeadm-bootstrap-script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ log::error_exit() {
local code="${2}"

log::error "${message}"
# {{ if .ControlPlane }}
log::info "Removing member from cluster status"
kubeadm reset -f update-cluster-status || true
log::info "Removing etcd member"
kubeadm reset -f remove-etcd-member || true
# {{ end }}
log::info "Resetting kubeadm"
kubeadm reset -f || true
log::error "cluster.x-k8s.io kubeadm bootstrap script $0 exiting with status ${code}"
Expand Down Expand Up @@ -86,7 +88,7 @@ function retry-command() {
until [ $n -ge 5 ]; do
log::info "running '$*'"
# shellcheck disable=SC1083
"$@" --config /tmp/kubeadm-controlplane-join-config.yaml {{.KubeadmVerbosity}}
"$@" --config /tmp/kubeadm-join-config.yaml {{.KubeadmVerbosity}}
kubeadm_return=$?
check_kubeadm_command "'$*'" "${kubeadm_return}"
if [ ${kubeadm_return} -eq 0 ]; then
Expand All @@ -104,26 +106,32 @@ function retry-command() {
fi
}

# {{ if .ControlPlane }}
function try-or-die-command() {
local kubeadm_return
log::info "running '$*'"
# shellcheck disable=SC1083
"$@" --config /tmp/kubeadm-controlplane-join-config.yaml {{.KubeadmVerbosity}}
"$@" --config /tmp/kubeadm-join-config.yaml {{.KubeadmVerbosity}}
kubeadm_return=$?
check_kubeadm_command "'$*'" "${kubeadm_return}"
if [ ${kubeadm_return} -ne 0 ]; then
log::error_exit "fatal error, exiting"
fi
}
# {{ end }}

retry-command kubeadm join phase preflight
# {{ if .ControlPlane }}
retry-command kubeadm join phase control-plane-prepare download-certs
retry-command kubeadm join phase control-plane-prepare certs
retry-command kubeadm join phase control-plane-prepare kubeconfig
retry-command kubeadm join phase control-plane-prepare control-plane
# {{ end }}
retry-command kubeadm join phase kubelet-start
# {{ if .ControlPlane }}
try-or-die-command kubeadm join phase control-plane-join etcd
retry-command kubeadm join phase control-plane-join update-status
retry-command kubeadm join phase control-plane-join mark-control-plane
# {{ end }}

log::success_exit
8 changes: 5 additions & 3 deletions bootstrap/kubeadm/internal/cloudinit/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ package cloudinit
const (
nodeCloudInit = `{{.Header}}
{{template "files" .WriteFiles}}
- path: /tmp/kubeadm-node.yaml
- path: /tmp/kubeadm-join-config.yaml
owner: root:root
permissions: '0640'
content: |
---
{{.JoinConfiguration | Indent 6}}
runcmd:
{{- template "commands" .PreKubeadmCommands }}
- 'kubeadm join --config /tmp/kubeadm-node.yaml {{.KubeadmVerbosity}}'
- {{ .KubeadmCommand }}
{{- template "commands" .PostKubeadmCommands }}
{{- template "ntp" .NTP }}
{{- template "users" .Users }}
Expand All @@ -37,12 +37,14 @@ runcmd:
// NodeInput defines the context to generate a node user data.
type NodeInput struct {
BaseUserData

JoinConfiguration string
}

// NewNode returns the user data string to be used on a node instance.
func NewNode(input *NodeInput) ([]byte, error) {
if err := input.prepare(); err != nil {
return nil, err
}
input.Header = cloudConfigHeader
input.WriteFiles = append(input.WriteFiles, input.AdditionalFiles...)
return generate("Node", nodeCloudInit, input)
Expand Down
Loading

0 comments on commit f773fba

Please sign in to comment.