Skip to content

Commit

Permalink
✨ Add sentinel file to signal successful bootstrapping
Browse files Browse the repository at this point in the history
  • Loading branch information
Cecile Robert-Michon committed Jan 21, 2021
1 parent f92d4ba commit 5ee066a
Show file tree
Hide file tree
Showing 11 changed files with 73 additions and 15 deletions.
7 changes: 6 additions & 1 deletion bootstrap/kubeadm/internal/cloudinit/cloudinit.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ import (
)

const (
standardJoinCommand = "kubeadm join --config /run/kubeadm/kubeadm-join-config.yaml %s"
standardJoinCommand = "kubeadm join --config /run/kubeadm/kubeadm-join-config.yaml %s"
// sentinelFileCommand writes a file to /run/cluster-api to signal successful Kubernetes bootstrapping in a way that
// works both for Linux and Windows OS.
sentinelFileCommand = "echo success > /run/cluster-api/bootstrap-success.complete"
retriableJoinScriptName = "/usr/local/bin/kubeadm-bootstrap-script"
retriableJoinScriptOwner = "root"
retriableJoinScriptPermissions = "0755"
Expand All @@ -50,6 +53,7 @@ type BaseUserData struct {
UseExperimentalRetry bool
KubeadmCommand string
KubeadmVerbosity string
SentinelFileCommand string
}

func (input *BaseUserData) prepare() error {
Expand All @@ -64,6 +68,7 @@ func (input *BaseUserData) prepare() error {
}
input.WriteFiles = append(input.WriteFiles, *joinScriptFile)
}
input.SentinelFileCommand = sentinelFileCommand
return nil
}

Expand Down
7 changes: 6 additions & 1 deletion bootstrap/kubeadm/internal/cloudinit/controlplane_init.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,13 @@ const (
{{.ClusterConfiguration | Indent 6}}
---
{{.InitConfiguration | Indent 6}}
- path: /run/cluster-api/placeholder
owner: root:root
permissions: '0640'
content: "This placeholder file is used to create the /run/cluster-api sub directory in a way that is compatible with both Linux and Windows (mkdir -p /run/cluster-api does not work with Windows)"
runcmd:
{{- template "commands" .PreKubeadmCommands }}
- 'kubeadm init --config /run/kubeadm/kubeadm.yaml {{.KubeadmVerbosity}}'
- 'kubeadm init --config /run/kubeadm/kubeadm.yaml {{.KubeadmVerbosity}} && {{ .SentinelFileCommand }}'
{{- template "commands" .PostKubeadmCommands }}
{{- template "ntp" .NTP }}
{{- template "users" .Users }}
Expand All @@ -57,6 +61,7 @@ func NewInitControlPlane(input *ControlPlaneInput) ([]byte, error) {
input.Header = cloudConfigHeader
input.WriteFiles = input.Certificates.AsFiles()
input.WriteFiles = append(input.WriteFiles, input.AdditionalFiles...)
input.SentinelFileCommand = sentinelFileCommand
userData, err := generate("InitControlplane", controlPlaneCloudInit, input)
if err != nil {
return nil, err
Expand Down
6 changes: 5 additions & 1 deletion bootstrap/kubeadm/internal/cloudinit/controlplane_join.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,13 @@ const (
permissions: '0640'
content: |
{{.JoinConfiguration | Indent 6}}
- path: /run/cluster-api/placeholder
owner: root:root
permissions: '0640'
content: "This placeholder file is used to create the /run/cluster-api sub directory in a way that is compatible with both Linux and Windows (mkdir -p /run/cluster-api does not work with Windows)"
runcmd:
{{- template "commands" .PreKubeadmCommands }}
- {{ .KubeadmCommand }}
- {{ .KubeadmCommand }} && {{ .SentinelFileCommand }}
{{- template "commands" .PostKubeadmCommands }}
{{- template "ntp" .NTP }}
{{- template "users" .Users }}
Expand Down
6 changes: 5 additions & 1 deletion bootstrap/kubeadm/internal/cloudinit/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,13 @@ const (
content: |
---
{{.JoinConfiguration | Indent 6}}
- path: /run/cluster-api/placeholder
owner: root:root
permissions: '0640'
content: "This placeholder file is used to create the /run/cluster-api sub directory in a way that is compatible with both Linux and Windows (mkdir -p /run/cluster-api does not work with Windows)"
runcmd:
{{- template "commands" .PreKubeadmCommands }}
- {{ .KubeadmCommand }}
- {{ .KubeadmCommand }} && {{ .SentinelFileCommand }}
{{- template "commands" .PostKubeadmCommands }}
{{- template "ntp" .NTP }}
{{- template "users" .Users }}
Expand Down
4 changes: 4 additions & 0 deletions docs/book/src/developer/providers/bootstrap.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ The following diagram shows the typical logic for a bootstrap provider:
1. Set `status.ready` to true
1. Patch the resource to persist changes

## Sentinel File

A bootstrap provider's bootstrap data must create `/run/cluster-api/bootstrap-success.complete` (or `C:\run\cluster-api\bootstrap-success.complete` for Windows machines) upon successful bootstrapping of a Kubernetes node. This allows infrastructure providers to detect and act on bootstrap failures.

## RBAC

### Provider controller
Expand Down
4 changes: 3 additions & 1 deletion test/infrastructure/docker/api/v1alpha4/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ const (
)

const (
// BootstrapExecSucceededCondition provide an observation of the DockerMachine bootstrap process.
// BootstrapExecSucceededCondition provides an observation of the DockerMachine bootstrap process.
// It is set based on successful execution of bootstrap commands and on the existence of
// the /run/cluster-api/bootstrap-success.complete file.
// The condition gets generated after ContainerProvisionedCondition is True.
//
// NOTE as a difference from other providers, container provisioning and bootstrap are directly managed
Expand Down
10 changes: 6 additions & 4 deletions test/infrastructure/docker/cloudinit/runcmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package cloudinit

import (
"encoding/json"
"fmt"
"strings"

"github.com/pkg/errors"
Expand Down Expand Up @@ -92,15 +91,18 @@ func (a *runCmd) Commands() ([]Cmd, error) {
func hackKubeadmIgnoreErrors(c Cmd) Cmd {
// case kubeadm commands are defined as a string
if c.Cmd == "/bin/sh" && len(c.Args) >= 2 {
if c.Args[0] == "-c" && (strings.Contains(c.Args[1], "kubeadm init") || strings.Contains(c.Args[1], "kubeadm join")) {
c.Args[1] = fmt.Sprintf("%s %s", c.Args[1], "--ignore-preflight-errors=all")
if c.Args[0] == "-c" {
c.Args[1] = strings.Replace(c.Args[1], "kubeadm init", "kubeadm init --ignore-preflight-errors=all", 1)
c.Args[1] = strings.Replace(c.Args[1], "kubeadm join", "kubeadm join --ignore-preflight-errors=all", 1)
}
}

// case kubeadm commands are defined as a list
if c.Cmd == "kubeadm" && len(c.Args) >= 1 {
if c.Args[0] == "init" || c.Args[0] == "join" {
c.Args = append(c.Args, "--ignore-preflight-errors=all")
c.Args = append(c.Args, "") // make space
copy(c.Args[2:], c.Args[1:]) // shift elements
c.Args[1] = "--ignore-preflight-errors=all" // insert the additional arg
}
}

Expand Down
6 changes: 3 additions & 3 deletions test/infrastructure/docker/cloudinit/runcmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func TestRunCmdRun(t *testing.T) {
},
},
expectedCmds: []Cmd{
{Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --config /run/kubeadm/kubeadm.yaml --ignore-preflight-errors=all"}},
{Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --ignore-preflight-errors=all --config /run/kubeadm/kubeadm.yaml"}},
},
},
}
Expand Down Expand Up @@ -98,11 +98,11 @@ runcmd:

r.Cmds[0] = hackKubeadmIgnoreErrors(r.Cmds[0])

expected0 := Cmd{Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --config=/run/kubeadm/kubeadm.yaml --ignore-preflight-errors=all"}}
expected0 := Cmd{Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --ignore-preflight-errors=all --config=/run/kubeadm/kubeadm.yaml"}}
g.Expect(r.Cmds[0]).To(Equal(expected0))

r.Cmds[1] = hackKubeadmIgnoreErrors(r.Cmds[1])

expected1 := Cmd{Cmd: "kubeadm", Args: []string{"join", "--config=/run/kubeadm/kubeadm-controlplane-join-config.yaml", "--ignore-preflight-errors=all"}}
expected1 := Cmd{Cmd: "kubeadm", Args: []string{"join", "--ignore-preflight-errors=all", "--config=/run/kubeadm/kubeadm-controlplane-join-config.yaml"}}
g.Expect(r.Cmds[1]).To(Equal(expected1))
}
Original file line number Diff line number Diff line change
Expand Up @@ -267,12 +267,17 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster *
conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap")
return ctrl.Result{}, errors.Wrap(err, "failed to exec DockerMachine bootstrap")
}
// Check for bootstrap success
if err := externalMachine.CheckForBootstrapSuccess(timeoutctx); err != nil {
conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap")
return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete")
}
// Update the BootstrapExecSucceededCondition condition
conditions.MarkTrue(dockerMachine, infrav1.BootstrapExecSucceededCondition)

dockerMachine.Spec.Bootstrapped = true
}

// Update the BootstrapExecSucceededCondition condition
conditions.MarkTrue(dockerMachine, infrav1.BootstrapExecSucceededCondition)

// set address in machine status
machineAddress, err := externalMachine.Address(ctx)
if err != nil {
Expand Down
22 changes: 22 additions & 0 deletions test/infrastructure/docker/docker/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,28 @@ func (m *Machine) ExecBootstrap(ctx context.Context, data string) error {
return nil
}

// CheckForBootstrapSuccess checks if bootstrap was successful by checking for existence of the sentinel file.
func (m *Machine) CheckForBootstrapSuccess(ctx context.Context) error {
log := ctrl.LoggerFrom(ctx)

if m.container == nil {
return errors.New("unable to set CheckForBootstrapSuccess. the container hosting this machine does not exists")
}

var outErr bytes.Buffer
var outStd bytes.Buffer
cmd := m.container.Commander.Command("test", "-f", "/run/cluster-api/bootstrap-success.complete")
cmd.SetStderr(&outErr)
cmd.SetStdout(&outStd)
err := cmd.Run(ctx)
if err != nil {
log.Info("Failed running command", "command", "test -f /run/cluster-api/bootstrap-success.complete", "stdout", outStd.String(), "stderr", outErr.String())
return errors.Wrap(errors.WithStack(err), "failed to run bootstrap check")
}

return nil
}

// SetNodeProviderID sets the docker provider ID for the kubernetes node
func (m *Machine) SetNodeProviderID(ctx context.Context) error {
log := ctrl.LoggerFrom(ctx)
Expand Down
5 changes: 5 additions & 0 deletions test/infrastructure/docker/exp/docker/nodepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,11 @@ func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machin
if err := externalMachine.ExecBootstrap(timeoutctx, bootstrapData); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to exec DockerMachinePool instance bootstrap for instance named %s", machine.Name())
}
// Check for bootstrap success
if err := externalMachine.CheckForBootstrapSuccess(timeoutctx); err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete")
}

machineStatus.Bootstrapped = true
// return to surface the machine has been bootstrapped.
return ctrl.Result{Requeue: true}, nil
Expand Down

0 comments on commit 5ee066a

Please sign in to comment.