Skip to content

Commit

Permalink
Discard and replace new VMs if they never join the cluster
Browse files Browse the repository at this point in the history
  • Loading branch information
mrog committed Jun 21, 2022
1 parent b9dc417 commit 892c41a
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 4 deletions.
14 changes: 14 additions & 0 deletions api/v1beta1/cloudstackmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1beta1
import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"time"
)

const (
Expand Down Expand Up @@ -130,10 +131,23 @@ type CloudStackMachineStatus struct {
// +optional
InstanceState InstanceState `json:"instanceState,omitempty"`

// InstanceStateLastUpdated is the time the instance state was last updated.
// +optional
InstanceStateLastUpdated metav1.Time `json:"instanceStateLastUpdated,omitempty"`

// Ready indicates the readiness of the provider resource.
Ready bool `json:"ready"`
}

// TimeSinceLastStateChange returns the amount of time that's elapsed since the state was last updated. If the state
// hasn't ever been updated, it returns a negative value.
func (s *CloudStackMachineStatus) TimeSinceLastStateChange() time.Duration {
if s.InstanceStateLastUpdated.IsZero() {
return time.Duration(-1)
}
return time.Since(s.InstanceStateLastUpdated.Time)
}

// +kubebuilder:object:root=true
// +kubebuilder:resource:path=cloudstackmachines,scope=Namespaced,categories=cluster-api,shortName=csm
// +kubebuilder:storageversion
Expand Down
1 change: 1 addition & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,11 @@ spec:
description: InstanceState is the state of the CloudStack instance
for this machine.
type: string
instanceStateLastUpdated:
description: InstanceStateLastUpdated is the time the instance state
was last updated.
format: date-time
type: string
ready:
description: Ready indicates the readiness of the provider resource.
type: boolean
Expand Down
21 changes: 19 additions & 2 deletions controllers/cloudstackmachinestatechecker_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,33 @@ func (r *CloudStackMachineStateCheckerReconciliationRunner) Reconcile() (ctrl.Re
if res, err := r.GetParent(r.ReconciliationSubject, r.CSMachine)(); r.ShouldReturn(res, err) {
return res, err
}

if res, err := r.GetParent(r.CSMachine, r.CAPIMachine)(); r.ShouldReturn(res, err) {
return res, err
}

if err := r.CSClient.ResolveVMInstanceDetails(r.CSMachine); err != nil && !strings.Contains(strings.ToLower(err.Error()), "no match found") {
return r.ReturnWrappedError(err, "failed to resolve VM instance details")
}
if r.CSMachine.Status.InstanceState == "Running" {

csRunning := r.CSMachine.Status.InstanceState == "Running"
csTimeInState := r.CSMachine.Status.TimeSinceLastStateChange()
capiRunning := r.CAPIMachine.Status.Phase == "Running"

// capiTimeout indicates that a new VM is running, but it isn't reachable due to a network issue or a misconfiguration.
// When this happens, the machine should be deleted or else the cluster won't ever recover.
capiTimeout := csRunning && !capiRunning && csTimeInState > 5*time.Minute

if csRunning && capiRunning {
r.ReconciliationSubject.Status.Ready = true
} else {
} else if !csRunning || capiTimeout {
r.Log.Info("CloudStack instance in bad state",
"name", r.CSMachine.Name,
"instance-id", r.CSMachine.Spec.InstanceID,
"cs-state", r.CSMachine.Status.InstanceState,
"cs-time-in-state", csTimeInState.String(),
"capi-phase", r.CAPIMachine.Status.Phase)

if err := r.K8sClient.Delete(r.RequestCtx, r.CAPIMachine); err != nil {
return r.ReturnWrappedError(err, "failed to delete CAPI machine")
}
Expand Down
7 changes: 6 additions & 1 deletion pkg/cloud/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package cloud

import (
"fmt"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"strings"

capiv1 "sigs.k8s.io/cluster-api/api/v1beta1"
Expand All @@ -41,7 +42,11 @@ func setMachineDataFromVMMetrics(vmResponse *cloudstack.VirtualMachinesMetric, c
csMachine.Spec.ProviderID = pointer.StringPtr(fmt.Sprintf("cloudstack:///%s", vmResponse.Id))
csMachine.Spec.InstanceID = pointer.StringPtr(vmResponse.Id)
csMachine.Status.Addresses = []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: vmResponse.Ipaddress}}
csMachine.Status.InstanceState = infrav1.InstanceState(vmResponse.State)
newInstanceState := infrav1.InstanceState(vmResponse.State)
if newInstanceState != csMachine.Status.InstanceState || (newInstanceState != "" && csMachine.Status.InstanceStateLastUpdated.IsZero()) {
csMachine.Status.InstanceState = newInstanceState
csMachine.Status.InstanceStateLastUpdated = metav1.Now()
}
}

// ResolveVMInstanceDetails Retrieves VM instance details by csMachine.Spec.InstanceID or csMachine.Name, and
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,6 @@ You can specify JOB environment variable which value is a regular expression to
For example,

```shell
JOB=PR-Blocking make run
JOB=PR-Blocking make run-e2e
```
This command runs the e2e tests that contains `PR-Blocking` in their spec names.

0 comments on commit 892c41a

Please sign in to comment.