Skip to content

Commit

Permalink
forward-port MHC external remediation
Browse files Browse the repository at this point in the history
  • Loading branch information
arghya88 committed Oct 29, 2020
1 parent 7424a1a commit f10a158
Show file tree
Hide file tree
Showing 12 changed files with 704 additions and 2 deletions.
14 changes: 14 additions & 0 deletions api/v1alpha3/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,18 @@ const (

// WaitingForRemediationReason is the reason used when a machine fails a health check and remediation is needed.
WaitingForRemediationReason = "WaitingForRemediation"

// ExternalRemediationTemplateAvailable is set on machinehealthchecks when MachineHealthCheck controller uses external remediation.
// ExternalRemediationTemplateAvailable is set to false if external remediation template is not found.
ExternalRemediationTemplateAvailable ConditionType = "ExternalRemediationTemplateAvailable"

// ExternalRemediationTemplateNotFound is the reason used when a machine health check fails to find external remediation template.
ExternalRemediationTemplateNotFound = "ExternalRemediationTemplateNotFound"

// ExternalRemediationRequestAvailable is set on machinehealthchecks when MachineHealthCheck controller uses external remediation.
// ExternalRemediationRequestAvailable is set to false if creating external remediation request fails.
ExternalRemediationRequestAvailable ConditionType = "ExternalRemediationRequestAvailable"

// ExternalRemediationRequestCreationFailed is the reason used when a machine health check fails to create external remediation request.
ExternalRemediationRequestCreationFailed = "ExternalRemediationRequestCreationFailed"
)
21 changes: 21 additions & 0 deletions api/v1alpha3/machinehealthcheck_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ type MachineHealthCheckSpec struct {
// failed and will be remediated.
// +optional
NodeStartupTimeout *metav1.Duration `json:"nodeStartupTimeout,omitempty"`

// RemediationTemplate is a reference to a remediation template
// provided by an infrastructure provider.
//
// This field is completely optional, when filled, the MachineHealthCheck controller
// creates a new object from the template referenced and hands off remediation of the machine to
// a controller that lives outside of Cluster API.
// +optional
RemediationTemplate *corev1.ObjectReference `json:"remediationTemplate,omitempty"`
}

// ANCHOR_END: MachineHealthCHeckSpec
Expand Down Expand Up @@ -91,6 +100,10 @@ type MachineHealthCheckStatus struct {
// Targets shows the current list of machines the machine health check is watching
// +optional
Targets []string `json:"targets,omitempty"`

// Conditions defines current service state of the MachineHealthCheck.
// +optional
Conditions Conditions `json:"conditions,omitempty"`
}

// ANCHOR_END: MachineHealthCheckStatus
Expand All @@ -114,6 +127,14 @@ type MachineHealthCheck struct {
Status MachineHealthCheckStatus `json:"status,omitempty"`
}

func (m *MachineHealthCheck) GetConditions() Conditions {
return m.Status.Conditions
}

func (m *MachineHealthCheck) SetConditions(conditions Conditions) {
m.Status.Conditions = conditions
}

// +kubebuilder:object:root=true

// MachineHealthCheckList contains a list of MachineHealthCheck
Expand Down
4 changes: 4 additions & 0 deletions api/v1alpha3/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions api/v1alpha3/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions api/v1alpha4/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,18 @@ const (

// WaitingForRemediationReason is the reason used when a machine fails a health check and remediation is needed.
WaitingForRemediationReason = "WaitingForRemediation"

// ExternalRemediationTemplateAvailable is set on machinehealthchecks when MachineHealthCheck controller uses external remediation.
// ExternalRemediationTemplateAvailable is set to false if external remediation template is not found.
ExternalRemediationTemplateAvailable ConditionType = "ExternalRemediationTemplateAvailable"

// ExternalRemediationTemplateNotFound is the reason used when a machine health check fails to find external remediation template.
ExternalRemediationTemplateNotFound = "ExternalRemediationTemplateNotFound"

// ExternalRemediationRequestAvailable is set on machinehealthchecks when MachineHealthCheck controller uses external remediation.
// ExternalRemediationRequestAvailable is set to false if creating external remediation request fails.
ExternalRemediationRequestAvailable ConditionType = "ExternalRemediationRequestAvailable"

// ExternalRemediationRequestCreationFailed is the reason used when a machine health check fails to create external remediation request.
ExternalRemediationRequestCreationFailed = "ExternalRemediationRequestCreationFailed"
)
21 changes: 21 additions & 0 deletions api/v1alpha4/machinehealthcheck_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ type MachineHealthCheckSpec struct {
// failed and will be remediated.
// +optional
NodeStartupTimeout *metav1.Duration `json:"nodeStartupTimeout,omitempty"`

// RemediationTemplate is a reference to a remediation template
// provided by an infrastructure provider.
//
// This field is completely optional, when filled, the MachineHealthCheck controller
// creates a new object from the template referenced and hands off remediation of the machine to
// a controller that lives outside of Cluster API.
// +optional
RemediationTemplate *corev1.ObjectReference `json:"remediationTemplate,omitempty"`
}

// ANCHOR_END: MachineHealthCHeckSpec
Expand Down Expand Up @@ -91,6 +100,10 @@ type MachineHealthCheckStatus struct {
// Targets shows the current list of machines the machine health check is watching
// +optional
Targets []string `json:"targets,omitempty"`

// Conditions defines current service state of the MachineHealthCheck.
// +optional
Conditions Conditions `json:"conditions,omitempty"`
}

// ANCHOR_END: MachineHealthCheckStatus
Expand All @@ -115,6 +128,14 @@ type MachineHealthCheck struct {
Status MachineHealthCheckStatus `json:"status,omitempty"`
}

func (m *MachineHealthCheck) GetConditions() Conditions {
return m.Status.Conditions
}

func (m *MachineHealthCheck) SetConditions(conditions Conditions) {
m.Status.Conditions = conditions
}

// +kubebuilder:object:root=true

// MachineHealthCheckList contains a list of MachineHealthCheck
Expand Down
12 changes: 12 additions & 0 deletions api/v1alpha4/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

108 changes: 108 additions & 0 deletions config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,31 @@ spec:
nodeStartupTimeout:
description: Machines older than this duration without a node will be considered to have failed and will be remediated.
type: string
remediationTemplate:
description: "RemediationTemplate is a reference to a remediation template provided by an infrastructure provider. \n This field is completely optional, when filled, the MachineHealthCheck controller creates a new object from the template referenced and hands off remediation of the machine to a controller that lives outside of Cluster API."
properties:
apiVersion:
description: API version of the referent.
type: string
fieldPath:
description: 'If referring to a piece of an object instead of an entire object, this string should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. For example, if the object reference is to a container within a pod, this would take on a value like: "spec.containers{name}" (where "name" refers to the name of the container that triggered the event) or if no container name is specified "spec.containers[2]" (container with index 2 in this pod). This syntax is chosen only to have some well-defined way of referencing a part of an object. TODO: this design is not final and this field is subject to change in the future.'
type: string
kind:
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
namespace:
description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
type: string
resourceVersion:
description: 'Specific resourceVersion to which this reference is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency'
type: string
uid:
description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids'
type: string
type: object
selector:
description: Label selector to match machines whose health will be exercised
properties:
Expand Down Expand Up @@ -121,6 +146,35 @@ spec:
status:
description: Most recently observed status of MachineHealthCheck resource
properties:
conditions:
description: Conditions defines current service state of the MachineHealthCheck.
items:
description: Condition defines an observation of a Cluster API resource operational state.
properties:
lastTransitionTime:
description: Last time the condition transitioned from one status to another. This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
format: date-time
type: string
message:
description: A human readable message indicating details about the transition. This field may be empty.
type: string
reason:
description: The reason for the condition's last transition in CamelCase. The specific API may choose whether or not this field is considered a guaranteed API. This field may not be empty.
type: string
severity:
description: Severity provides an explicit classification of Reason code, so the users or machines can immediately understand the current situation and act accordingly. The Severity field MUST be set only when Status=False.
type: string
status:
description: Status of the condition, one of True, False, Unknown.
type: string
type:
description: Type of condition in CamelCase or in foo.example.com/CamelCase. Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be useful (see .node.status.conditions), the ability to deconflict is important.
type: string
required:
- status
- type
type: object
type: array
currentHealthy:
description: total number of healthy machines counted by this machine health check
format: int32
Expand Down Expand Up @@ -188,6 +242,31 @@ spec:
nodeStartupTimeout:
description: Machines older than this duration without a node will be considered to have failed and will be remediated.
type: string
remediationTemplate:
description: "RemediationTemplate is a reference to a remediation template provided by an infrastructure provider. \n This field is completely optional, when filled, the MachineHealthCheck controller creates a new object from the template referenced and hands off remediation of the machine to a controller that lives outside of Cluster API."
properties:
apiVersion:
description: API version of the referent.
type: string
fieldPath:
description: 'If referring to a piece of an object instead of an entire object, this string should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. For example, if the object reference is to a container within a pod, this would take on a value like: "spec.containers{name}" (where "name" refers to the name of the container that triggered the event) or if no container name is specified "spec.containers[2]" (container with index 2 in this pod). This syntax is chosen only to have some well-defined way of referencing a part of an object. TODO: this design is not final and this field is subject to change in the future.'
type: string
kind:
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
namespace:
description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
type: string
resourceVersion:
description: 'Specific resourceVersion to which this reference is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency'
type: string
uid:
description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids'
type: string
type: object
selector:
description: Label selector to match machines whose health will be exercised
properties:
Expand Down Expand Up @@ -246,6 +325,35 @@ spec:
status:
description: Most recently observed status of MachineHealthCheck resource
properties:
conditions:
description: Conditions defines current service state of the MachineHealthCheck.
items:
description: Condition defines an observation of a Cluster API resource operational state.
properties:
lastTransitionTime:
description: Last time the condition transitioned from one status to another. This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
format: date-time
type: string
message:
description: A human readable message indicating details about the transition. This field may be empty.
type: string
reason:
description: The reason for the condition's last transition in CamelCase. The specific API may choose whether or not this field is considered a guaranteed API. This field may not be empty.
type: string
severity:
description: Severity provides an explicit classification of Reason code, so the users or machines can immediately understand the current situation and act accordingly. The Severity field MUST be set only when Status=False.
type: string
status:
description: Status of the condition, one of True, False, Unknown.
type: string
type:
description: Type of condition in CamelCase or in foo.example.com/CamelCase. Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be useful (see .node.status.conditions), the ability to deconflict is important.
type: string
required:
- status
- type
type: object
type: array
currentHealthy:
description: total number of healthy machines counted by this machine health check
format: int32
Expand Down
Loading

0 comments on commit f10a158

Please sign in to comment.