From 48f201066e1b5559cc9949a2c78536bb73fca516 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Thu, 4 Jun 2020 18:36:09 +0200 Subject: [PATCH] Add conditions to the DockerMachine object --- .../docker/api/v1alpha3/condition_consts.go | 65 +++++++++++++ .../api/v1alpha3/dockermachine_types.go | 13 +++ .../api/v1alpha3/zz_generated.deepcopy.go | 9 +- ...cture.cluster.x-k8s.io_dockermachines.yaml | 44 +++++++++ .../controllers/dockermachine_controller.go | 91 +++++++++++++------ test/infrastructure/docker/docker/machine.go | 5 + 6 files changed, 197 insertions(+), 30 deletions(-) create mode 100644 test/infrastructure/docker/api/v1alpha3/condition_consts.go diff --git a/test/infrastructure/docker/api/v1alpha3/condition_consts.go b/test/infrastructure/docker/api/v1alpha3/condition_consts.go new file mode 100644 index 000000000000..ab1fb554c67e --- /dev/null +++ b/test/infrastructure/docker/api/v1alpha3/condition_consts.go @@ -0,0 +1,65 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha3 + +import clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" + +// Conditions and condition Reasons for the DockerMachine object + +const ConditionsCount = 2 + +const ( + // ContainerProvisionedCondition documents the status of the provisioning of the container + // generated by a DockerMachine. + // + // NOTE: When the container provisioning starts the process completes almost immediately and within + // the same reconciliation, so the user will always see a transition from Wait to Provisioned without + // having evidence that the operation is started/is in progress. + ContainerProvisionedCondition clusterv1.ConditionType = "ContainerProvisioned" + + // WaitingForClusterInfrastructureReason (Severity=Info) documents a DockerMachine waiting for the cluster + // infrastructure to be ready before starting to create the container that provides the DockerMachine + // infrastructure. + WaitingForClusterInfrastructureReason = "WaitingForClusterInfrastructure" + + // WaitingForBootstrapDataReason (Severity=Info) documents a DockerMachine waiting for the bootstrap + // script to be ready before starting to create the container that provides the DockerMachine infrastructure. + WaitingForBootstrapDataReason = "WaitingForBootstrapData" + + // ContainerProvisioningFailedReason (Severity=Warning) documents a DockerMachine controller detecting + // an error while provisioning the container that provides the DockerMachine infrastructure; those kind of + // errors are usually transient and failed provisioning are automatically re-tried by the controller. + ContainerProvisioningFailedReason = "ContainerProvisioningFailed" +) + +const ( + // BootstrapExecSucceededCondition provide an observation of the DockerMachine bootstrap process. + // The condition gets generated after ContainerProvisionedCondition is True. + // + // NOTE as a difference from other providers, container provisioning and bootstrap are directly managed + // by the DockerMachine controller (not by cloud-init). + BootstrapExecSucceededCondition clusterv1.ConditionType = "BootstrapExecSucceeded" + + // BootstrappingReason documents (Severity=Info) a DockerMachine currently executing the bootstrap + // script that creates the Kubernetes node on the newly provisioned machine infrastructure. + BootstrappingReason = "Bootstrapping" + + // BootstrapFailedReason documents (Severity=Warning) a DockerMachine controller detecting an error while + // bootstrapping the Kubernetes node on the machine just provisioned; those kind of errors are usually + // transient and failed bootstrap are automatically re-tried by the controller. + BootstrapFailedReason = "BootstrapFailed" +) diff --git a/test/infrastructure/docker/api/v1alpha3/dockermachine_types.go b/test/infrastructure/docker/api/v1alpha3/dockermachine_types.go index d821f96c507e..f5d60d91588e 100644 --- a/test/infrastructure/docker/api/v1alpha3/dockermachine_types.go +++ b/test/infrastructure/docker/api/v1alpha3/dockermachine_types.go @@ -18,6 +18,7 @@ package v1alpha3 import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" ) const ( @@ -79,6 +80,10 @@ type DockerMachineStatus struct { // added to the load balancer // +optional LoadBalancerConfigured bool `json:"loadBalancerConfigured,omitempty"` + + // Conditions defines current service state of the DockerMachine. + // +optional + Conditions clusterv1.Conditions `json:"conditions,omitempty"` } // +kubebuilder:resource:path=dockermachines,scope=Namespaced,categories=cluster-api @@ -95,6 +100,14 @@ type DockerMachine struct { Status DockerMachineStatus `json:"status,omitempty"` } +func (c *DockerMachine) GetConditions() clusterv1.Conditions { + return c.Status.Conditions +} + +func (c *DockerMachine) SetConditions(conditions clusterv1.Conditions) { + c.Status.Conditions = conditions +} + // +kubebuilder:object:root=true // DockerMachineList contains a list of DockerMachine diff --git a/test/infrastructure/docker/api/v1alpha3/zz_generated.deepcopy.go b/test/infrastructure/docker/api/v1alpha3/zz_generated.deepcopy.go index 1ca2eeab58b2..8c351345a712 100644 --- a/test/infrastructure/docker/api/v1alpha3/zz_generated.deepcopy.go +++ b/test/infrastructure/docker/api/v1alpha3/zz_generated.deepcopy.go @@ -150,7 +150,7 @@ func (in *DockerMachine) DeepCopyInto(out *DockerMachine) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DockerMachine. @@ -236,6 +236,13 @@ func (in *DockerMachineSpec) DeepCopy() *DockerMachineSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DockerMachineStatus) DeepCopyInto(out *DockerMachineStatus) { *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make(apiv1alpha3.Conditions, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DockerMachineStatus. diff --git a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachines.yaml b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachines.yaml index 6f4cc61014d2..5f16c8af64f6 100644 --- a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachines.yaml +++ b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachines.yaml @@ -82,6 +82,50 @@ spec: status: description: DockerMachineStatus defines the observed state of DockerMachine properties: + conditions: + description: Conditions defines current service state of the DockerMachine. + items: + description: Condition defines an observation of a Cluster API resource + operational state. + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status + to another. This should be when the underlying condition changed. + If that is not known, then using the time when the API field + changed is acceptable. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. This field may be empty. + type: string + reason: + description: The reason for the condition's last transition + in CamelCase. The specific API may choose whether or not this + field is considered a guaranteed API. This field may not be + empty. + type: string + severity: + description: Severity provides an explicit classification of + Reason code, so the users or machines can immediately understand + the current situation and act accordingly. The Severity field + MUST be set only when Status=False. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of condition in CamelCase or in foo.example.com/CamelCase. + Many .condition.type values are consistent across resources + like Available, but because arbitrary conditions can be useful + (see .node.status.conditions), the ability to deconflict is + important. + type: string + required: + - status + - type + type: object + type: array loadBalancerConfigured: description: LoadBalancerConfigured denotes that the machine has been added to the load balancer diff --git a/test/infrastructure/docker/controllers/dockermachine_controller.go b/test/infrastructure/docker/controllers/dockermachine_controller.go index 8cea9544dc19..f19e3728cca9 100644 --- a/test/infrastructure/docker/controllers/dockermachine_controller.go +++ b/test/infrastructure/docker/controllers/dockermachine_controller.go @@ -30,6 +30,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1alpha3" "sigs.k8s.io/cluster-api/test/infrastructure/docker/docker" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/predicates" ctrl "sigs.k8s.io/controller-runtime" @@ -95,12 +96,6 @@ func (r *DockerMachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re log = log.WithValues("cluster", cluster.Name) - // Make sure infrastructure is ready - if !cluster.Status.InfrastructureReady { - log.Info("Waiting for DockerCluster Controller to create cluster infrastructure") - return ctrl.Result{}, nil - } - // Fetch the Docker Cluster. dockerCluster := &infrav1.DockerCluster{} dockerClusterName := client.ObjectKey{ @@ -114,21 +109,6 @@ func (r *DockerMachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re log = log.WithValues("docker-cluster", dockerCluster.Name) - // Create a helper for managing the docker container hosting the machine. - externalMachine, err := docker.NewMachine(cluster.Name, machine.Name, dockerMachine.Spec.CustomImage, log) - if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine") - } - - // Create a helper for managing a docker container hosting the loadbalancer. - // NB. the machine controller has to manage the cluster load balancer because the current implementation of the - // docker load balancer does not support auto-discovery of control plane nodes, so CAPD should take care of - // updating the cluster load balancer configuration when control plane machines are added/removed - externalLoadBalancer, err := docker.NewLoadBalancer(cluster.Name, log) - if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalLoadBalancer") - } - // Initialize the patch helper patchHelper, err := patch.NewHelper(dockerMachine, r) if err != nil { @@ -136,6 +116,9 @@ func (r *DockerMachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re } // Always attempt to Patch the DockerMachine object and status after each reconciliation. defer func() { + // always update the readyCondition; the summary is represented using the "1 of x completed" notation. + conditions.SetSummary(dockerMachine, conditions.WithStepCounter(infrav1.ConditionsCount)) + if err := patchHelper.Patch(ctx, dockerMachine); err != nil { log.Error(err, "failed to patch DockerMachine") if rerr == nil { @@ -144,6 +127,28 @@ func (r *DockerMachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re } }() + // Check if the infrastructure is ready, otherwise return and wait for the cluster object to be updated + if !cluster.Status.InfrastructureReady { + log.Info("Waiting for DockerCluster Controller to create cluster infrastructure") + conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{}, nil + } + + // Create a helper for managing the docker container hosting the machine. + externalMachine, err := docker.NewMachine(cluster.Name, machine.Name, dockerMachine.Spec.CustomImage, log) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine") + } + + // Create a helper for managing a docker container hosting the loadbalancer. + // NB. the machine controller has to manage the cluster load balancer because the current implementation of the + // docker load balancer does not support auto-discovery of control plane nodes, so CAPD should take care of + // updating the cluster load balancer configuration when control plane machines are added/removed + externalLoadBalancer, err := docker.NewLoadBalancer(cluster.Name, log) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalLoadBalancer") + } + // Handle deleted machines if !dockerMachine.ObjectMeta.DeletionTimestamp.IsZero() { return r.reconcileDelete(ctx, machine, dockerMachine, externalMachine, externalLoadBalancer) @@ -160,14 +165,16 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, machine * // if the machine is already provisioned, return if dockerMachine.Spec.ProviderID != nil { // ensure ready state is set. - // This is required after move, bacuse status is not moved to the target cluster. + // This is required after move, because status is not moved to the target cluster. dockerMachine.Status.Ready = true + conditions.MarkTrue(dockerMachine, infrav1.ContainerProvisionedCondition) return ctrl.Result{}, nil } // Make sure bootstrap data is available and populated. if machine.Spec.Bootstrap.DataSecretName == nil { log.Info("Waiting for the Bootstrap provider controller to set bootstrap data") + conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") return ctrl.Result{}, nil } @@ -185,18 +192,30 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, machine * if err := externalMachine.Delete(ctx); err != nil { log.Info("Failed to cleanup machine") } + dockerMachine.Status.LoadBalancerConfigured = false + conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, infrav1.ContainerProvisioningFailedReason, clusterv1.ConditionSeverityWarning, "Re-provisioning") + conditions.Delete(dockerMachine, infrav1.BootstrapExecSucceededCondition) + res = ctrl.Result{RequeueAfter: 10 * time.Second} retErr = nil } }() - if err := externalMachine.Create(ctx, role, machine.Spec.Version, dockerMachine.Spec.ExtraMounts); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to create worker DockerMachine") + // Create the machine if not existing yet + if !externalMachine.Exists() { + if err := externalMachine.Create(ctx, role, machine.Spec.Version, dockerMachine.Spec.ExtraMounts); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to create worker DockerMachine") + } } + // Update the ContainerProvisionedCondition condition + conditions.MarkTrue(dockerMachine, infrav1.ContainerProvisionedCondition) + // Preload images into the container - if err := externalMachine.PreloadLoadImages(ctx, dockerMachine.Spec.PreLoadImages); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to pre-load images into the DockerMachine") + if len(dockerMachine.Spec.PreLoadImages) > 0 { + if err := externalMachine.PreloadLoadImages(ctx, dockerMachine.Spec.PreLoadImages); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to pre-load images into the DockerMachine") + } } // if the machine is a control plane update the load balancer configuration @@ -209,34 +228,48 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, machine * dockerMachine.Status.LoadBalancerConfigured = true } + // At, this stage, we are ready for bootstrap. However, if the BootstrapExecSucceededCondition is missing we add it and we + // requeue so the user can see the change of state before the bootstrap actually starts. + // NOTE: usually controller should not rely on status they are setting, but on the observed state; however + // in this case we are doing this because we explicitly want to give a feedback to users. + if !conditions.Has(dockerMachine, infrav1.BootstrapExecSucceededCondition) { + conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrappingReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{Requeue: true}, nil + } + // if the machine isn't bootstrapped, only then run bootstrap scripts if !dockerMachine.Spec.Bootstrapped { bootstrapData, err := r.getBootstrapData(ctx, machine) if err != nil { r.Log.Error(err, "failed to get bootstrap data") - return ctrl.Result{}, nil + return ctrl.Result{}, err } timeoutctx, cancel := context.WithTimeout(ctx, 3*time.Minute) defer cancel() // Run the bootstrap script. Simulates cloud-init. if err := externalMachine.ExecBootstrap(timeoutctx, bootstrapData); err != nil { + conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap") return ctrl.Result{}, errors.Wrap(err, "failed to exec DockerMachine bootstrap") } dockerMachine.Spec.Bootstrapped = true } + // Update the BootstrapExecSucceededCondition condition + conditions.MarkTrue(dockerMachine, infrav1.BootstrapExecSucceededCondition) + // Usually a cloud provider will do this, but there is no docker-cloud provider. - // Requeue after 1s if there is an error, as this is likely momentary load balancer + // Requeue if there is an error, as this is likely momentary load balancer // state changes during control plane provisioning. if err := externalMachine.SetNodeProviderID(ctx); err != nil { r.Log.Error(err, "failed to patch the Kubernetes node with the machine providerID") - return ctrl.Result{RequeueAfter: time.Second}, nil + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } // Set ProviderID so the Cluster API Machine Controller can pull it providerID := externalMachine.ProviderID() dockerMachine.Spec.ProviderID = &providerID dockerMachine.Status.Ready = true + conditions.MarkTrue(dockerMachine, infrav1.ContainerProvisionedCondition) return ctrl.Result{}, nil } diff --git a/test/infrastructure/docker/docker/machine.go b/test/infrastructure/docker/docker/machine.go index 472e0a1befc8..7dbc9889296b 100644 --- a/test/infrastructure/docker/docker/machine.go +++ b/test/infrastructure/docker/docker/machine.go @@ -89,6 +89,11 @@ func NewMachine(cluster, machine, image string, logger logr.Logger) (*Machine, e }, nil } +// Exists returns true if the container for this machine exists. +func (m *Machine) Exists() bool { + return m.container != nil +} + // ContainerName return the name of the container for this machine func (m *Machine) ContainerName() string { return machineContainerName(m.cluster, m.machine)