From 54fc4f75229be51955b4c22a87cec5346888352c Mon Sep 17 00:00:00 2001 From: Cecile Robert-Michon Date: Wed, 13 Jan 2021 11:40:00 -0700 Subject: [PATCH] :sparkles: Add sentinel file to signal successful bootstrapping --- .../kubeadm/internal/cloudinit/cloudinit.go | 7 ++++- .../internal/cloudinit/controlplane_init.go | 7 ++++- .../internal/cloudinit/controlplane_join.go | 6 ++++- bootstrap/kubeadm/internal/cloudinit/node.go | 6 ++++- .../book/src/developer/providers/bootstrap.md | 4 +++ .../docker/api/v1alpha4/condition_consts.go | 8 ++++++ .../infrastructure/docker/cloudinit/runcmd.go | 10 ++++--- .../docker/cloudinit/runcmd_test.go | 6 ++--- .../controllers/dockermachine_controller.go | 16 ++++++++--- test/infrastructure/docker/docker/machine.go | 27 +++++++++++++++++++ .../docker/exp/docker/nodepool.go | 8 ++++++ 11 files changed, 91 insertions(+), 14 deletions(-) diff --git a/bootstrap/kubeadm/internal/cloudinit/cloudinit.go b/bootstrap/kubeadm/internal/cloudinit/cloudinit.go index dae81bee6eb3..73923a1e7858 100644 --- a/bootstrap/kubeadm/internal/cloudinit/cloudinit.go +++ b/bootstrap/kubeadm/internal/cloudinit/cloudinit.go @@ -26,7 +26,10 @@ import ( ) const ( - standardJoinCommand = "kubeadm join --config /run/kubeadm/kubeadm-join-config.yaml %s" + standardJoinCommand = "kubeadm join --config /run/kubeadm/kubeadm-join-config.yaml %s" + // sentinelFileCommand writes a file to /run/cluster-api to signal successful Kubernetes bootstrapping in a way that + // works both for Linux and Windows OS. + sentinelFileCommand = "echo success > /run/cluster-api/bootstrap-success.complete" retriableJoinScriptName = "/usr/local/bin/kubeadm-bootstrap-script" retriableJoinScriptOwner = "root" retriableJoinScriptPermissions = "0755" @@ -50,6 +53,7 @@ type BaseUserData struct { UseExperimentalRetry bool KubeadmCommand string KubeadmVerbosity string + SentinelFileCommand string } func (input *BaseUserData) prepare() error { @@ -64,6 +68,7 @@ func (input *BaseUserData) prepare() error { } input.WriteFiles = append(input.WriteFiles, *joinScriptFile) } + input.SentinelFileCommand = sentinelFileCommand return nil } diff --git a/bootstrap/kubeadm/internal/cloudinit/controlplane_init.go b/bootstrap/kubeadm/internal/cloudinit/controlplane_init.go index 35d4d4a30540..c6cd12d5665e 100644 --- a/bootstrap/kubeadm/internal/cloudinit/controlplane_init.go +++ b/bootstrap/kubeadm/internal/cloudinit/controlplane_init.go @@ -31,9 +31,13 @@ const ( {{.ClusterConfiguration | Indent 6}} --- {{.InitConfiguration | Indent 6}} +- path: /run/cluster-api/placeholder + owner: root:root + permissions: '0640' + content: "This placeholder file is used to create the /run/cluster-api sub directory in a way that is compatible with both Linux and Windows (mkdir -p /run/cluster-api does not work with Windows)" runcmd: {{- template "commands" .PreKubeadmCommands }} - - 'kubeadm init --config /run/kubeadm/kubeadm.yaml {{.KubeadmVerbosity}}' + - 'kubeadm init --config /run/kubeadm/kubeadm.yaml {{.KubeadmVerbosity}} && {{ .SentinelFileCommand }}' {{- template "commands" .PostKubeadmCommands }} {{- template "ntp" .NTP }} {{- template "users" .Users }} @@ -57,6 +61,7 @@ func NewInitControlPlane(input *ControlPlaneInput) ([]byte, error) { input.Header = cloudConfigHeader input.WriteFiles = input.Certificates.AsFiles() input.WriteFiles = append(input.WriteFiles, input.AdditionalFiles...) + input.SentinelFileCommand = sentinelFileCommand userData, err := generate("InitControlplane", controlPlaneCloudInit, input) if err != nil { return nil, err diff --git a/bootstrap/kubeadm/internal/cloudinit/controlplane_join.go b/bootstrap/kubeadm/internal/cloudinit/controlplane_join.go index 103ba0d903af..ddb04190c487 100644 --- a/bootstrap/kubeadm/internal/cloudinit/controlplane_join.go +++ b/bootstrap/kubeadm/internal/cloudinit/controlplane_join.go @@ -29,9 +29,13 @@ const ( permissions: '0640' content: | {{.JoinConfiguration | Indent 6}} +- path: /run/cluster-api/placeholder + owner: root:root + permissions: '0640' + content: "This placeholder file is used to create the /run/cluster-api sub directory in a way that is compatible with both Linux and Windows (mkdir -p /run/cluster-api does not work with Windows)" runcmd: {{- template "commands" .PreKubeadmCommands }} - - {{ .KubeadmCommand }} + - {{ .KubeadmCommand }} && {{ .SentinelFileCommand }} {{- template "commands" .PostKubeadmCommands }} {{- template "ntp" .NTP }} {{- template "users" .Users }} diff --git a/bootstrap/kubeadm/internal/cloudinit/node.go b/bootstrap/kubeadm/internal/cloudinit/node.go index 5da4cc8fa41a..5bb466e608dd 100644 --- a/bootstrap/kubeadm/internal/cloudinit/node.go +++ b/bootstrap/kubeadm/internal/cloudinit/node.go @@ -25,9 +25,13 @@ const ( content: | --- {{.JoinConfiguration | Indent 6}} +- path: /run/cluster-api/placeholder + owner: root:root + permissions: '0640' + content: "This placeholder file is used to create the /run/cluster-api sub directory in a way that is compatible with both Linux and Windows (mkdir -p /run/cluster-api does not work with Windows)" runcmd: {{- template "commands" .PreKubeadmCommands }} - - {{ .KubeadmCommand }} + - {{ .KubeadmCommand }} && {{ .SentinelFileCommand }} {{- template "commands" .PostKubeadmCommands }} {{- template "ntp" .NTP }} {{- template "users" .Users }} diff --git a/docs/book/src/developer/providers/bootstrap.md b/docs/book/src/developer/providers/bootstrap.md index 2a7a72f33041..79d39835fae6 100644 --- a/docs/book/src/developer/providers/bootstrap.md +++ b/docs/book/src/developer/providers/bootstrap.md @@ -59,6 +59,10 @@ The following diagram shows the typical logic for a bootstrap provider: 1. Set `status.ready` to true 1. Patch the resource to persist changes +## Sentinel File + +A bootstrap provider's bootstrap data must create `/run/cluster-api/bootstrap-success.complete` (or `C:\run\cluster-api\bootstrap-success.complete` for Windows machines) upon successful bootstrapping of a Kubernetes node. This allows infrastructure providers to detect and act on bootstrap failures. + ## RBAC ### Provider controller diff --git a/test/infrastructure/docker/api/v1alpha4/condition_consts.go b/test/infrastructure/docker/api/v1alpha4/condition_consts.go index 1d12658acedf..ce0d1ddc7969 100644 --- a/test/infrastructure/docker/api/v1alpha4/condition_consts.go +++ b/test/infrastructure/docker/api/v1alpha4/condition_consts.go @@ -52,6 +52,14 @@ const ( // by the DockerMachine controller (not by cloud-init). BootstrapExecSucceededCondition clusterv1.ConditionType = "BootstrapExecSucceeded" + // BootstrapSucceededCondition provide an observation of the DockerMachine bootstrap outcome. + // This is done by checking for the existence of the /run/cluster-api/bootstrap-success.complete file. + // The condition gets generated after BootstrapExecSucceededCondition is True. + // + // NOTE as a difference from other providers, container provisioning and bootstrap are directly managed + // by the DockerMachine controller (not by cloud-init). + BootstrapSucceededCondition clusterv1.ConditionType = "BootstrapSucceeded" + // BootstrappingReason documents (Severity=Info) a DockerMachine currently executing the bootstrap // script that creates the Kubernetes node on the newly provisioned machine infrastructure. BootstrappingReason = "Bootstrapping" diff --git a/test/infrastructure/docker/cloudinit/runcmd.go b/test/infrastructure/docker/cloudinit/runcmd.go index 104e9c34f0df..e7f1a0571e2f 100644 --- a/test/infrastructure/docker/cloudinit/runcmd.go +++ b/test/infrastructure/docker/cloudinit/runcmd.go @@ -18,7 +18,6 @@ package cloudinit import ( "encoding/json" - "fmt" "strings" "github.com/pkg/errors" @@ -92,15 +91,18 @@ func (a *runCmd) Commands() ([]Cmd, error) { func hackKubeadmIgnoreErrors(c Cmd) Cmd { // case kubeadm commands are defined as a string if c.Cmd == "/bin/sh" && len(c.Args) >= 2 { - if c.Args[0] == "-c" && (strings.Contains(c.Args[1], "kubeadm init") || strings.Contains(c.Args[1], "kubeadm join")) { - c.Args[1] = fmt.Sprintf("%s %s", c.Args[1], "--ignore-preflight-errors=all") + if c.Args[0] == "-c" { + c.Args[1] = strings.Replace(c.Args[1], "kubeadm init", "kubeadm init --ignore-preflight-errors=all", 1) + c.Args[1] = strings.Replace(c.Args[1], "kubeadm join", "kubeadm join --ignore-preflight-errors=all", 1) } } // case kubeadm commands are defined as a list if c.Cmd == "kubeadm" && len(c.Args) >= 1 { if c.Args[0] == "init" || c.Args[0] == "join" { - c.Args = append(c.Args, "--ignore-preflight-errors=all") + c.Args = append(c.Args, "") // make space + copy(c.Args[2:], c.Args[1:]) // shift elements + c.Args[1] = "--ignore-preflight-errors=all" // insert the additional arg } } diff --git a/test/infrastructure/docker/cloudinit/runcmd_test.go b/test/infrastructure/docker/cloudinit/runcmd_test.go index 7d5b4b1de5e4..0de69b08141f 100644 --- a/test/infrastructure/docker/cloudinit/runcmd_test.go +++ b/test/infrastructure/docker/cloudinit/runcmd_test.go @@ -68,7 +68,7 @@ func TestRunCmdRun(t *testing.T) { }, }, expectedCmds: []Cmd{ - {Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --config /run/kubeadm/kubeadm.yaml --ignore-preflight-errors=all"}}, + {Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --ignore-preflight-errors=all --config /run/kubeadm/kubeadm.yaml"}}, }, }, } @@ -98,11 +98,11 @@ runcmd: r.Cmds[0] = hackKubeadmIgnoreErrors(r.Cmds[0]) - expected0 := Cmd{Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --config=/run/kubeadm/kubeadm.yaml --ignore-preflight-errors=all"}} + expected0 := Cmd{Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --ignore-preflight-errors=all --config=/run/kubeadm/kubeadm.yaml"}} g.Expect(r.Cmds[0]).To(Equal(expected0)) r.Cmds[1] = hackKubeadmIgnoreErrors(r.Cmds[1]) - expected1 := Cmd{Cmd: "kubeadm", Args: []string{"join", "--config=/run/kubeadm/kubeadm-controlplane-join-config.yaml", "--ignore-preflight-errors=all"}} + expected1 := Cmd{Cmd: "kubeadm", Args: []string{"join", "--ignore-preflight-errors=all", "--config=/run/kubeadm/kubeadm-controlplane-join-config.yaml"}} g.Expect(r.Cmds[1]).To(Equal(expected1)) } diff --git a/test/infrastructure/docker/controllers/dockermachine_controller.go b/test/infrastructure/docker/controllers/dockermachine_controller.go index 6fa210e02d27..c16d7b04fd7c 100644 --- a/test/infrastructure/docker/controllers/dockermachine_controller.go +++ b/test/infrastructure/docker/controllers/dockermachine_controller.go @@ -267,12 +267,22 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap") return ctrl.Result{}, errors.Wrap(err, "failed to exec DockerMachine bootstrap") } + // Update the BootstrapExecSucceededCondition condition + conditions.MarkTrue(dockerMachine, infrav1.BootstrapExecSucceededCondition) + + timeoutctx, cancel = context.WithTimeout(ctx, 1*time.Minute) + defer cancel() + // Check for bootstrap success + if err := externalMachine.CheckForBootstrapSuccess(timeoutctx); err != nil { + conditions.MarkFalse(dockerMachine, infrav1.BootstrapSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "") + return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete") + } + // Update the BootstrapSucceededCondition condition + conditions.MarkTrue(dockerMachine, infrav1.BootstrapSucceededCondition) + dockerMachine.Spec.Bootstrapped = true } - // Update the BootstrapExecSucceededCondition condition - conditions.MarkTrue(dockerMachine, infrav1.BootstrapExecSucceededCondition) - // set address in machine status machineAddress, err := externalMachine.Address(ctx) if err != nil { diff --git a/test/infrastructure/docker/docker/machine.go b/test/infrastructure/docker/docker/machine.go index 51deb7a1fb15..bbb8a3c7b5b0 100644 --- a/test/infrastructure/docker/docker/machine.go +++ b/test/infrastructure/docker/docker/machine.go @@ -317,6 +317,33 @@ func (m *Machine) ExecBootstrap(ctx context.Context, data string) error { return nil } +// CheckForBootstrapSuccess checks if bootstrap was successful by checking for existence of the sentinel file. +func (m *Machine) CheckForBootstrapSuccess(ctx context.Context) error { + log := ctrl.LoggerFrom(ctx) + + if m.container == nil { + return errors.New("unable to set CheckForBootstrapSuccess. the container hosting this machine does not exists") + } + + command := cloudinit.Cmd{ + Cmd: "test", + Args: []string{"-f", "/run/cluster-api/bootstrap-success.complete"}, + } + + var outErr bytes.Buffer + var outStd bytes.Buffer + cmd := m.container.Commander.Command(command.Cmd, command.Args...) + cmd.SetStderr(&outErr) + cmd.SetStdout(&outStd) + err := cmd.Run(ctx) + if err != nil { + log.Info("Failed running command", "command", command, "stdout", outStd.String(), "stderr", outErr.String()) + return errors.Wrap(errors.WithStack(err), "failed to run bootstrap check") + } + + return nil +} + // SetNodeProviderID sets the docker provider ID for the kubernetes node func (m *Machine) SetNodeProviderID(ctx context.Context) error { log := ctrl.LoggerFrom(ctx) diff --git a/test/infrastructure/docker/exp/docker/nodepool.go b/test/infrastructure/docker/exp/docker/nodepool.go index cdf68a76b47b..eaa55aec00e7 100644 --- a/test/infrastructure/docker/exp/docker/nodepool.go +++ b/test/infrastructure/docker/exp/docker/nodepool.go @@ -249,6 +249,14 @@ func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machin if err := externalMachine.ExecBootstrap(timeoutctx, bootstrapData); err != nil { return ctrl.Result{}, errors.Wrapf(err, "failed to exec DockerMachinePool instance bootstrap for instance named %s", machine.Name()) } + + timeoutctx, cancel = context.WithTimeout(ctx, 1*time.Minute) + defer cancel() + // Check for bootstrap success + if err := externalMachine.CheckForBootstrapSuccess(timeoutctx); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete") + } + machineStatus.Bootstrapped = true // return to surface the machine has been bootstrapped. return ctrl.Result{Requeue: true}, nil