Skip to content

Commit

Permalink
:run: cabpk: Add retries to control plane join
Browse files Browse the repository at this point in the history
Signed-off-by: Naadir Jeewa <[email protected]>
  • Loading branch information
Naadir Jeewa committed Mar 25, 2020
1 parent 04ca7d1 commit a6f865d
Show file tree
Hide file tree
Showing 11 changed files with 466 additions and 9 deletions.
10 changes: 9 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ CONVERSION_GEN := $(abspath $(TOOLS_BIN_DIR)/conversion-gen)
# Bindata.
GOBINDATA := $(abspath $(TOOLS_BIN_DIR)/go-bindata)
GOBINDATA_CLUSTERCTL_DIR := cmd/clusterctl/config
CLOUDINIT_PKG_DIR := bootstrap/kubeadm/internal/cloudinit
CLOUDINIT_GENERATED := $(CLOUDINIT_PKG_DIR)/zz_generated.bindata.go
CLOUDINIT_SCRIPT := $(CLOUDINIT_PKG_DIR)/kubeadm-bootstrap-script.sh
CERTMANAGER_COMPONENTS_GENERATED_FILE := cert-manager.yaml

# Define Docker related variables. Releases should modify and double check these vars.
Expand Down Expand Up @@ -242,7 +245,7 @@ generate-go-kubeadm-control-plane: $(CONTROLLER_GEN) $(CONVERSION_GEN) ## Runs G
paths=./controlplane/kubeadm/api/...

.PHONY: generate-bindata
generate-bindata: $(KUSTOMIZE) $(GOBINDATA) clean-bindata ## Generate code for embedding the clusterctl api manifest
generate-bindata: $(KUSTOMIZE) $(GOBINDATA) clean-bindata $(CLOUDINIT_GENERATED) ## Generate code for embedding the clusterctl api manifest
# Package manifest YAML into a single file.
mkdir -p $(GOBINDATA_CLUSTERCTL_DIR)/manifest/
$(KUSTOMIZE) build $(GOBINDATA_CLUSTERCTL_DIR)/crd > $(GOBINDATA_CLUSTERCTL_DIR)/manifest/clusterctl-api.yaml
Expand All @@ -255,6 +258,11 @@ generate-bindata: $(KUSTOMIZE) $(GOBINDATA) clean-bindata ## Generate code for e
# Cleanup the manifest folder.
$(MAKE) clean-bindata

$(CLOUDINIT_GENERATED): $(GOBINDATA) $(CLOUDINIT_SCRIPT)
$(GOBINDATA) -mode=420 -modtime=1 -pkg=cloudinit -o=$(CLOUDINIT_GENERATED).tmp $(CLOUDINIT_SCRIPT)
cat ./hack/boilerplate/boilerplate.generatego.txt $(CLOUDINIT_GENERATED).tmp > $(CLOUDINIT_GENERATED)
rm $(CLOUDINIT_GENERATED).tmp

.PHONY: generate-manifests
generate-manifests: ## Generate manifests e.g. CRD, RBAC etc.
$(MAKE) generate-core-manifests
Expand Down
2 changes: 1 addition & 1 deletion bootstrap/kubeadm/api/v1alpha2/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func (src *KubeadmConfig) ConvertTo(dstRaw conversion.Hub) error {

dst.Status.DataSecretName = restored.Status.DataSecretName
dst.Spec.Verbosity = restored.Spec.Verbosity
dst.Spec.UseExperimentalRetryJoin = restored.Spec.UseExperimentalRetryJoin

return nil
}
Expand Down Expand Up @@ -119,7 +120,6 @@ func Convert_v1alpha3_KubeadmConfigStatus_To_v1alpha2_KubeadmConfigStatus(in *ku
return nil
}


// Convert_v1alpha2_KubeadmConfigSpec_To_v1alpha3_KubeadmConfigSpec converts this KubeadmConfigSpec to the Hub version (v1alpha3).
func Convert_v1alpha2_KubeadmConfigSpec_To_v1alpha3_KubeadmConfigSpec(in *KubeadmConfigSpec, out *kubeadmbootstrapv1alpha3.KubeadmConfigSpec, s apiconversion.Scope) error {
return autoConvert_v1alpha2_KubeadmConfigSpec_To_v1alpha3_KubeadmConfigSpec(in, out, s)
Expand Down
1 change: 1 addition & 0 deletions bootstrap/kubeadm/api/v1alpha2/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ type KubeadmConfigSpec struct {
// It overrides the `--v` flag in kubeadm commands.
// +optional
Verbosity *int32 `json:"verbosity,omitempty"`

// UseExperimentalRetryJoin replaces a basic kubeadm command with a shell
// script with retries for control plane joins
// +optional
UseExperimentalRetryJoin bool `json:"useExperimentalRetryJoin,omitempty"`
}

// KubeadmConfigStatus defines the observed state of KubeadmConfig
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1554,6 +1554,10 @@ spec:
items:
type: string
type: array
useExperimentalRetryJoin:
description: UseExperimentalRetryJoin replaces a basic kubeadm command
with a shell script with retries for control plane joins
type: boolean
users:
description: Users specifies extra users to add
items:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1633,6 +1633,11 @@ spec:
items:
type: string
type: array
useExperimentalRetryJoin:
description: UseExperimentalRetryJoin replaces a basic kubeadm
command with a shell script with retries for control plane
joins
type: boolean
users:
description: Users specifies extra users to add
items:
Expand Down
5 changes: 3 additions & 2 deletions bootstrap/kubeadm/controllers/kubeadmconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,8 +489,9 @@ func (r *KubeadmConfigReconciler) joinControlplane(ctx context.Context, scope *S
}

cloudJoinData, err := cloudinit.NewJoinControlPlane(&cloudinit.ControlPlaneJoinInput{
JoinConfiguration: joinData,
Certificates: certificates,
JoinConfiguration: joinData,
Certificates: certificates,
UseExperimentalRetryJoin: scope.Config.Spec.UseExperimentalRetryJoin,
BaseUserData: cloudinit.BaseUserData{
AdditionalFiles: scope.Config.Spec.Files,
NTP: scope.Config.Spec.NTP,
Expand Down
42 changes: 37 additions & 5 deletions bootstrap/kubeadm/internal/cloudinit/controlplane_join.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@ limitations under the License.
package cloudinit

import (
"github.com/pkg/errors"
"fmt"

"github.com/pkg/errors"
bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1alpha3"
"sigs.k8s.io/cluster-api/util/secret"
)

const (
standardJoinCommand = "kubeadm join --config /tmp/kubeadm-controlplane-join-config.yaml %s"
retriableJoinCommand = "/usr/local/bin/kubeadm-bootstrap-script"
controlPlaneJoinCloudInit = `{{.Header}}
{{template "files" .WriteFiles}}
- path: /tmp/kubeadm-controlplane-join-config.yaml
Expand All @@ -32,7 +36,7 @@ const (
{{.JoinConfiguration | Indent 6}}
runcmd:
{{- template "commands" .PreKubeadmCommands }}
- 'kubeadm join --config /tmp/kubeadm-controlplane-join-config.yaml {{.KubeadmVerbosity}}'
- {{ .KubeadmCommand }}
{{- template "commands" .PostKubeadmCommands }}
{{- template "ntp" .NTP }}
{{- template "users" .Users }}
Expand All @@ -43,9 +47,10 @@ runcmd:
type ControlPlaneJoinInput struct {
BaseUserData
secret.Certificates

BootstrapToken string
JoinConfiguration string
UseExperimentalRetryJoin bool
KubeadmCommand string
BootstrapToken string
JoinConfiguration string
}

// NewJoinControlPlane returns the user data string to be used on a new control plane instance.
Expand All @@ -54,10 +59,37 @@ func NewJoinControlPlane(input *ControlPlaneJoinInput) ([]byte, error) {
// TODO: Consider validating that the correct certificates exist. It is different for external/stacked etcd
input.WriteFiles = input.Certificates.AsFiles()
input.WriteFiles = append(input.WriteFiles, input.AdditionalFiles...)
input.KubeadmCommand = fmt.Sprintf(standardJoinCommand, input.KubeadmVerbosity)
if input.UseExperimentalRetryJoin {
err := input.useBootstrapScript()
if err != nil {
return nil, err
}
}
userData, err := generate("JoinControlplane", controlPlaneJoinCloudInit, input)
if err != nil {
return nil, errors.Wrapf(err, "failed to generate user data for machine joining control plane")
}

return userData, err
}

func (input *ControlPlaneJoinInput) useBootstrapScript() error {
scriptBytes, err := bootstrapKubeadmInternalCloudinitKubeadmBootstrapScriptShBytes()
if err != nil {
return errors.Wrap(err, "couldn't read bootstrap script")
}
joinScript, err := generate("JoinControlplaneScript", string(scriptBytes), input)
if err != nil {
return errors.Wrap(err, "failed to generate user data for machine joining control plane")
}
joinScriptFile := bootstrapv1.File{
Path: "/usr/local/bin/kubeadm-bootstrap-script",
Owner: "root",
Permissions: "0755",
Content: string(joinScript),
}
input.WriteFiles = append(input.WriteFiles, joinScriptFile)
input.KubeadmCommand = retriableJoinCommand
return nil
}
129 changes: 129 additions & 0 deletions bootstrap/kubeadm/internal/cloudinit/kubeadm-bootstrap-script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/bin/bash
# Copyright 2020 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Log an error and exit.
# Args:
# $1 Message to log with the error
# $2 The error code to return
log::error_exit() {
local message="${1}"
local code="${2}"

log::error "${message}"
log::info "Removing member from cluster status"
kubeadm reset -f update-cluster-status || true
log::info "Removing etcd member"
kubeadm reset -f remove-etcd-member || true
log::info "Resetting kubeadm"
kubeadm reset -f || true
log::error "cluster.x-k8s.io kubeadm bootstrap script $0 exiting with status ${code}"
exit "${code}"
}

log::success_exit() {
log::info "cluster.x-k8s.io kubeadm bootstrap script $0 finished"
exit 0
}

# Log an error but keep going.
log::error() {
local message="${1}"
timestamp=$(date --iso-8601=seconds)
echo "!!! [${timestamp}] ${1}" >&2
shift
for message; do
echo " ${message}" >&2
done
}

# Print a status line. Formatted to show up in a stream of output.
log::info() {
timestamp=$(date --iso-8601=seconds)
echo "+++ [${timestamp}] ${1}"
shift
for message; do
echo " ${message}"
done
}

check_kubeadm_command() {
local command="${1}"
local code="${2}"
case ${code} in
"0")
log::info "kubeadm reported successful execution for ${command}"
;;
"1")
log::error "kubeadm reported failed action(s) for ${command}"
;;
"2")
log::error "kubeadm reported preflight check error during ${command}"
;;
"3")
log::error_exit "kubeadm reported validation error for ${command}"
;;
*)
log::error "kubeadm reported unknown error ${code} for ${command}"
;;
esac
}

function retry-command() {
n=0
local kubeadm_return
until [ $n -ge 5 ]; do
log::info "running '$*'"
# shellcheck disable=SC1083
"$@" --config /tmp/kubeadm-controlplane-join-config.yaml {{.KubeadmVerbosity}}
kubeadm_return=$?
check_kubeadm_command "'$*'" "${kubeadm_return}"
if [ ${kubeadm_return} -eq 0 ]; then
break
fi
# We allow preflight errors to pass
if [ ${kubeadm_return} -eq 2 ]; then
break
fi
n=$((n + 1))
sleep 15
done
if [ ${kubeadm_return} -ne 0 ]; then
log::error_exit "too many errors, exiting"
fi
}

function try-or-die-command() {
local kubeadm_return
log::info "running '$*'"
# shellcheck disable=SC1083
"$@" --config /tmp/kubeadm-controlplane-join-config.yaml {{.KubeadmVerbosity}}
kubeadm_return=$?
check_kubeadm_command "'$*'" "${kubeadm_return}"
if [ ${kubeadm_return} -ne 0 ]; then
log::error_exit "fatal error, exiting"
fi
}

retry-command kubeadm join phase preflight
retry-command kubeadm join phase control-plane-prepare download-certs
retry-command kubeadm join phase control-plane-prepare certs
retry-command kubeadm join phase control-plane-prepare kubeconfig
retry-command kubeadm join phase control-plane-prepare control-plane
retry-command kubeadm join phase kubelet-start
try-or-die-command kubeadm join phase control-plane-join etcd
retry-command kubeadm join phase control-plane-join update-status
retry-command kubeadm join phase control-plane-join mark-control-plane

log::success_exit
Loading

0 comments on commit a6f865d

Please sign in to comment.