Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Add sentinel file to signal successful bootstrapping #4084

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion bootstrap/kubeadm/internal/cloudinit/cloudinit.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ import (
)

const (
standardJoinCommand = "kubeadm join --config /run/kubeadm/kubeadm-join-config.yaml %s"
standardJoinCommand = "kubeadm join --config /run/kubeadm/kubeadm-join-config.yaml %s"
// sentinelFileCommand writes a file to /run/cluster-api to signal successful Kubernetes bootstrapping in a way that
// works both for Linux and Windows OS.
sentinelFileCommand = "echo success > /run/cluster-api/bootstrap-success.complete"
vincepri marked this conversation as resolved.
Show resolved Hide resolved
retriableJoinScriptName = "/usr/local/bin/kubeadm-bootstrap-script"
retriableJoinScriptOwner = "root"
retriableJoinScriptPermissions = "0755"
Expand All @@ -50,6 +53,7 @@ type BaseUserData struct {
UseExperimentalRetry bool
KubeadmCommand string
KubeadmVerbosity string
SentinelFileCommand string
}

func (input *BaseUserData) prepare() error {
Expand All @@ -64,6 +68,7 @@ func (input *BaseUserData) prepare() error {
}
input.WriteFiles = append(input.WriteFiles, *joinScriptFile)
}
input.SentinelFileCommand = sentinelFileCommand
return nil
}

Expand Down
7 changes: 6 additions & 1 deletion bootstrap/kubeadm/internal/cloudinit/controlplane_init.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,13 @@ const (
{{.ClusterConfiguration | Indent 6}}
---
{{.InitConfiguration | Indent 6}}
- path: /run/cluster-api/placeholder
owner: root:root
permissions: '0640'
content: "This placeholder file is used to create the /run/cluster-api sub directory in a way that is compatible with both Linux and Windows (mkdir -p /run/cluster-api does not work with Windows)"
runcmd:
{{- template "commands" .PreKubeadmCommands }}
- 'kubeadm init --config /run/kubeadm/kubeadm.yaml {{.KubeadmVerbosity}}'
- 'kubeadm init --config /run/kubeadm/kubeadm.yaml {{.KubeadmVerbosity}} && {{ .SentinelFileCommand }}'
{{- template "commands" .PostKubeadmCommands }}
{{- template "ntp" .NTP }}
{{- template "users" .Users }}
Expand All @@ -57,6 +61,7 @@ func NewInitControlPlane(input *ControlPlaneInput) ([]byte, error) {
input.Header = cloudConfigHeader
input.WriteFiles = input.Certificates.AsFiles()
input.WriteFiles = append(input.WriteFiles, input.AdditionalFiles...)
input.SentinelFileCommand = sentinelFileCommand
userData, err := generate("InitControlplane", controlPlaneCloudInit, input)
if err != nil {
return nil, err
Expand Down
6 changes: 5 additions & 1 deletion bootstrap/kubeadm/internal/cloudinit/controlplane_join.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,13 @@ const (
permissions: '0640'
content: |
{{.JoinConfiguration | Indent 6}}
- path: /run/cluster-api/placeholder
owner: root:root
permissions: '0640'
content: "This placeholder file is used to create the /run/cluster-api sub directory in a way that is compatible with both Linux and Windows (mkdir -p /run/cluster-api does not work with Windows)"
CecileRobertMichon marked this conversation as resolved.
Show resolved Hide resolved
runcmd:
{{- template "commands" .PreKubeadmCommands }}
- {{ .KubeadmCommand }}
- {{ .KubeadmCommand }} && {{ .SentinelFileCommand }}
{{- template "commands" .PostKubeadmCommands }}
{{- template "ntp" .NTP }}
{{- template "users" .Users }}
Expand Down
6 changes: 5 additions & 1 deletion bootstrap/kubeadm/internal/cloudinit/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,13 @@ const (
content: |
---
{{.JoinConfiguration | Indent 6}}
- path: /run/cluster-api/placeholder
owner: root:root
permissions: '0640'
content: "This placeholder file is used to create the /run/cluster-api sub directory in a way that is compatible with both Linux and Windows (mkdir -p /run/cluster-api does not work with Windows)"
runcmd:
{{- template "commands" .PreKubeadmCommands }}
- {{ .KubeadmCommand }}
- {{ .KubeadmCommand }} && {{ .SentinelFileCommand }}
{{- template "commands" .PostKubeadmCommands }}
{{- template "ntp" .NTP }}
{{- template "users" .Users }}
Expand Down
4 changes: 4 additions & 0 deletions docs/book/src/developer/providers/bootstrap.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ The following diagram shows the typical logic for a bootstrap provider:
1. Set `status.ready` to true
1. Patch the resource to persist changes

## Sentinel File

A bootstrap provider's bootstrap data must create `/run/cluster-api/bootstrap-success.complete` (or `C:\run\cluster-api\bootstrap-success.complete` for Windows machines) upon successful bootstrapping of a Kubernetes node. This allows infrastructure providers to detect and act on bootstrap failures.

## RBAC

### Provider controller
Expand Down
4 changes: 3 additions & 1 deletion test/infrastructure/docker/api/v1alpha4/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ const (
)

const (
// BootstrapExecSucceededCondition provide an observation of the DockerMachine bootstrap process.
// BootstrapExecSucceededCondition provides an observation of the DockerMachine bootstrap process.
// It is set based on successful execution of bootstrap commands and on the existence of
// the /run/cluster-api/bootstrap-success.complete file.
// The condition gets generated after ContainerProvisionedCondition is True.
//
// NOTE as a difference from other providers, container provisioning and bootstrap are directly managed
Expand Down
10 changes: 6 additions & 4 deletions test/infrastructure/docker/cloudinit/runcmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package cloudinit

import (
"encoding/json"
"fmt"
"strings"

"github.com/pkg/errors"
Expand Down Expand Up @@ -92,15 +91,18 @@ func (a *runCmd) Commands() ([]Cmd, error) {
func hackKubeadmIgnoreErrors(c Cmd) Cmd {
// case kubeadm commands are defined as a string
if c.Cmd == "/bin/sh" && len(c.Args) >= 2 {
if c.Args[0] == "-c" && (strings.Contains(c.Args[1], "kubeadm init") || strings.Contains(c.Args[1], "kubeadm join")) {
c.Args[1] = fmt.Sprintf("%s %s", c.Args[1], "--ignore-preflight-errors=all")
if c.Args[0] == "-c" {
c.Args[1] = strings.Replace(c.Args[1], "kubeadm init", "kubeadm init --ignore-preflight-errors=all", 1)
c.Args[1] = strings.Replace(c.Args[1], "kubeadm join", "kubeadm join --ignore-preflight-errors=all", 1)
}
}

// case kubeadm commands are defined as a list
if c.Cmd == "kubeadm" && len(c.Args) >= 1 {
if c.Args[0] == "init" || c.Args[0] == "join" {
c.Args = append(c.Args, "--ignore-preflight-errors=all")
c.Args = append(c.Args, "") // make space
copy(c.Args[2:], c.Args[1:]) // shift elements
c.Args[1] = "--ignore-preflight-errors=all" // insert the additional arg
}
}

Expand Down
6 changes: 3 additions & 3 deletions test/infrastructure/docker/cloudinit/runcmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func TestRunCmdRun(t *testing.T) {
},
},
expectedCmds: []Cmd{
{Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --config /run/kubeadm/kubeadm.yaml --ignore-preflight-errors=all"}},
{Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --ignore-preflight-errors=all --config /run/kubeadm/kubeadm.yaml"}},
},
},
}
Expand Down Expand Up @@ -98,11 +98,11 @@ runcmd:

r.Cmds[0] = hackKubeadmIgnoreErrors(r.Cmds[0])

expected0 := Cmd{Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --config=/run/kubeadm/kubeadm.yaml --ignore-preflight-errors=all"}}
expected0 := Cmd{Cmd: "/bin/sh", Args: []string{"-c", "kubeadm init --ignore-preflight-errors=all --config=/run/kubeadm/kubeadm.yaml"}}
g.Expect(r.Cmds[0]).To(Equal(expected0))

r.Cmds[1] = hackKubeadmIgnoreErrors(r.Cmds[1])

expected1 := Cmd{Cmd: "kubeadm", Args: []string{"join", "--config=/run/kubeadm/kubeadm-controlplane-join-config.yaml", "--ignore-preflight-errors=all"}}
expected1 := Cmd{Cmd: "kubeadm", Args: []string{"join", "--ignore-preflight-errors=all", "--config=/run/kubeadm/kubeadm-controlplane-join-config.yaml"}}
g.Expect(r.Cmds[1]).To(Equal(expected1))
}
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,12 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster *
conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap")
return ctrl.Result{}, errors.Wrap(err, "failed to exec DockerMachine bootstrap")
}
// Check for bootstrap success
if err := externalMachine.CheckForBootstrapSuccess(timeoutctx); err != nil {
conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap")
return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete")
}

dockerMachine.Spec.Bootstrapped = true
}

Expand Down
22 changes: 22 additions & 0 deletions test/infrastructure/docker/docker/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,28 @@ func (m *Machine) ExecBootstrap(ctx context.Context, data string) error {
return nil
}

// CheckForBootstrapSuccess checks if bootstrap was successful by checking for existence of the sentinel file.
func (m *Machine) CheckForBootstrapSuccess(ctx context.Context) error {
log := ctrl.LoggerFrom(ctx)

if m.container == nil {
return errors.New("unable to set CheckForBootstrapSuccess. the container hosting this machine does not exists")
}

var outErr bytes.Buffer
var outStd bytes.Buffer
cmd := m.container.Commander.Command("test", "-f", "/run/cluster-api/bootstrap-success.complete")
cmd.SetStderr(&outErr)
cmd.SetStdout(&outStd)
err := cmd.Run(ctx)
if err != nil {
log.Info("Failed running command", "command", "test -f /run/cluster-api/bootstrap-success.complete", "stdout", outStd.String(), "stderr", outErr.String())
return errors.Wrap(errors.WithStack(err), "failed to run bootstrap check")
}

return nil
}

// SetNodeProviderID sets the docker provider ID for the kubernetes node
func (m *Machine) SetNodeProviderID(ctx context.Context) error {
log := ctrl.LoggerFrom(ctx)
Expand Down
5 changes: 5 additions & 0 deletions test/infrastructure/docker/exp/docker/nodepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,11 @@ func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machin
if err := externalMachine.ExecBootstrap(timeoutctx, bootstrapData); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to exec DockerMachinePool instance bootstrap for instance named %s", machine.Name())
}
// Check for bootstrap success
if err := externalMachine.CheckForBootstrapSuccess(timeoutctx); err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete")
}

machineStatus.Bootstrapped = true
// return to surface the machine has been bootstrapped.
return ctrl.Result{Requeue: true}, nil
Expand Down