Skip to content

Commit

Permalink
feat(cluster): add failureDomain spec label
Browse files Browse the repository at this point in the history
The goal of this commit is to allow capz users to specify
which failure domains are eligible for control plane rollouts.

There's a new label in AzureCluster.spec.failureDomain that can be
used to override the values of failureDomain.ControlPlane to false,
to prevent the control plane being deployed there.

The field is optional - if it's missing, all discovered failure domains
are announced in status as-is.

THERE IS NO BREAKING CHANGE TO CURRENT USERS.
  • Loading branch information
handsomejack-42 committed Oct 26, 2023
1 parent 6987809 commit e2eb28d
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 1 deletion.
8 changes: 8 additions & 0 deletions api/v1beta1/types_class.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package v1beta1

import (
corev1 "k8s.io/api/core/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)

// AzureClusterClassSpec defines the AzureCluster properties that may be shared across several Azure clusters.
Expand Down Expand Up @@ -56,6 +57,13 @@ type AzureClusterClassSpec struct {
// Note: All cloud provider config values can be customized by creating the secret beforehand. CloudProviderConfigOverrides is only used when the secret is managed by the Azure Provider.
// +optional
CloudProviderConfigOverrides *CloudProviderConfigOverrides `json:"cloudProviderConfigOverrides,omitempty"`

// FailureDomains is a list of failure domains in the cluster's region, used to restrict
// eligibility to host the control plane. A FailureDomain maps to an availability zone,
// which is a separated group of datacenters within a region.
// See: https://learn.microsoft.com/azure/reliability/availability-zones-overview
// +optional
FailureDomains clusterv1.FailureDomains `json:"failureDomains,omitempty"`
}

// ExtendedLocationSpec defines the ExtendedLocation properties to enable CAPZ for Azure public MEC.
Expand Down
7 changes: 7 additions & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion azure/scope/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -875,11 +875,17 @@ func (s *ClusterScope) APIServerHost() string {
return s.APIServerPublicIP().DNSName
}

// SetFailureDomain will set the spec for a for a given key.
// SetFailureDomain sets a failure domain in a cluster's status by its id.
// The provided failure domain spec may be overridden to false by cluster's spec property.
func (s *ClusterScope) SetFailureDomain(id string, spec clusterv1.FailureDomainSpec) {
if s.AzureCluster.Status.FailureDomains == nil {
s.AzureCluster.Status.FailureDomains = make(clusterv1.FailureDomains)
}

if fd, ok := s.AzureCluster.Spec.FailureDomains[id]; ok && !fd.ControlPlane {
spec.ControlPlane = false
}

s.AzureCluster.Status.FailureDomains[id] = spec
}

Expand Down
70 changes: 70 additions & 0 deletions azure/scope/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3438,3 +3438,73 @@ func TestVNetPeerings(t *testing.T) {
})
}
}

func TestSetFailureDomain(t *testing.T) {
t.Parallel()

cases := map[string]struct {
discoveredFDs clusterv1.FailureDomains
specifiedFDs clusterv1.FailureDomains
expectedFDs clusterv1.FailureDomains
}{
"no failure domains specified": {
discoveredFDs: clusterv1.FailureDomains{
"fd1": clusterv1.FailureDomainSpec{ControlPlane: true},
"fd2": clusterv1.FailureDomainSpec{ControlPlane: false},
},
expectedFDs: clusterv1.FailureDomains{
"fd1": clusterv1.FailureDomainSpec{ControlPlane: true},
"fd2": clusterv1.FailureDomainSpec{ControlPlane: false},
},
},
"no failure domains discovered": {
specifiedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: true}},
},
"failure domain specified without intersection": {
discoveredFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: true}},
specifiedFDs: clusterv1.FailureDomains{"fd2": clusterv1.FailureDomainSpec{ControlPlane: false}},
expectedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: true}},
},
"failure domain override to false succeeds": {
discoveredFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: true}},
specifiedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: false}},
expectedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: false}},
},
"failure domain override to true fails": {
discoveredFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: false}},
specifiedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: true}},
expectedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: false}},
},
}

for name, tc := range cases {
tc := tc
t.Run(name, func(t *testing.T) {
t.Parallel()
g := NewWithT(t)

c := ClusterScope{
AzureCluster: &infrav1.AzureCluster{
Spec: infrav1.AzureClusterSpec{
AzureClusterClassSpec: infrav1.AzureClusterClassSpec{
FailureDomains: tc.specifiedFDs,
},
},
},
}

for fdName, fd := range tc.discoveredFDs {
c.SetFailureDomain(fdName, fd)
}

for fdName, fd := range tc.expectedFDs {
g.Expect(fdName).Should(BeKeyOf(c.AzureCluster.Status.FailureDomains))
g.Expect(c.AzureCluster.Status.FailureDomains[fdName].ControlPlane).To(Equal(fd.ControlPlane))

delete(c.AzureCluster.Status.FailureDomains, fdName)
}

g.Expect(c.AzureCluster.Status.FailureDomains).To(BeEmpty())
})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,28 @@ spec:
- name
- type
type: object
failureDomains:
additionalProperties:
description: FailureDomainSpec is the Schema for Cluster API failure
domains. It allows controllers to understand how many failure
domains a cluster can optionally span across.
properties:
attributes:
additionalProperties:
type: string
description: Attributes is a free form map of attributes an
infrastructure provider might use or require.
type: object
controlPlane:
description: ControlPlane determines if this failure domain
is suitable for use by control plane machines.
type: boolean
type: object
description: 'FailureDomains is a list of failure domains in the cluster''s
region, used to restrict eligibility to host the control plane.
A FailureDomain maps to an availability zone, which is a separated
group of datacenters within a region. See: https://learn.microsoft.com/azure/reliability/availability-zones-overview'
type: object
identityRef:
description: IdentityRef is a reference to an AzureIdentity to be
used when reconciling this cluster
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,30 @@ spec:
- name
- type
type: object
failureDomains:
additionalProperties:
description: FailureDomainSpec is the Schema for Cluster
API failure domains. It allows controllers to understand
how many failure domains a cluster can optionally span
across.
properties:
attributes:
additionalProperties:
type: string
description: Attributes is a free form map of attributes
an infrastructure provider might use or require.
type: object
controlPlane:
description: ControlPlane determines if this failure
domain is suitable for use by control plane machines.
type: boolean
type: object
description: 'FailureDomains is a list of failure domains
in the cluster''s region, used to restrict eligibility to
host the control plane. A FailureDomain maps to an availability
zone, which is a separated group of datacenters within a
region. See: https://learn.microsoft.com/azure/reliability/availability-zones-overview'
type: object
identityRef:
description: IdentityRef is a reference to an AzureIdentity
to be used when reconciling this cluster
Expand Down
15 changes: 15 additions & 0 deletions docs/book/src/topics/failure-domains.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,21 @@ spec:
```

If you can't use `Machine` (or `MachineDeployment`) to explicitly place your VMs (for example, `KubeadmControlPlane` does not accept those as an object reference but rather uses `AzureMachineTemplate` directly), then you can opt to restrict the announcement of discovered failure domains from the cluster's status itself.

```yaml
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: AzureCluster
metadata:
name: my-cluster
namespace: default
spec:
location: eastus
failureDomains:
1:
controlPlane: true
```

### Using Virtual Machine Scale Sets

You can use an `AzureMachinePool` object to deploy a Virtual Machine Scale Set which automatically distributes VM instances across the configured availability zones.
Expand Down

0 comments on commit e2eb28d

Please sign in to comment.