From e2eb28dea574e20b0eceda334dc93dc765240c1a Mon Sep 17 00:00:00 2001 From: HandsomeJack Date: Tue, 17 Oct 2023 09:50:21 +0000 Subject: [PATCH] feat(cluster): add failureDomain spec label The goal of this commit is to allow capz users to specify which failure domains are eligible for control plane rollouts. There's a new label in AzureCluster.spec.failureDomain that can be used to override the values of failureDomain.ControlPlane to false, to prevent the control plane being deployed there. The field is optional - if it's missing, all discovered failure domains are announced in status as-is. THERE IS NO BREAKING CHANGE TO CURRENT USERS. --- api/v1beta1/types_class.go | 8 +++ api/v1beta1/zz_generated.deepcopy.go | 7 ++ azure/scope/cluster.go | 8 ++- azure/scope/cluster_test.go | 70 +++++++++++++++++++ ...ucture.cluster.x-k8s.io_azureclusters.yaml | 22 ++++++ ...luster.x-k8s.io_azureclustertemplates.yaml | 24 +++++++ docs/book/src/topics/failure-domains.md | 15 ++++ 7 files changed, 153 insertions(+), 1 deletion(-) diff --git a/api/v1beta1/types_class.go b/api/v1beta1/types_class.go index 6bc37d840e2..cfec0c603e1 100644 --- a/api/v1beta1/types_class.go +++ b/api/v1beta1/types_class.go @@ -18,6 +18,7 @@ package v1beta1 import ( corev1 "k8s.io/api/core/v1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" ) // AzureClusterClassSpec defines the AzureCluster properties that may be shared across several Azure clusters. @@ -56,6 +57,13 @@ type AzureClusterClassSpec struct { // Note: All cloud provider config values can be customized by creating the secret beforehand. CloudProviderConfigOverrides is only used when the secret is managed by the Azure Provider. // +optional CloudProviderConfigOverrides *CloudProviderConfigOverrides `json:"cloudProviderConfigOverrides,omitempty"` + + // FailureDomains is a list of failure domains in the cluster's region, used to restrict + // eligibility to host the control plane. A FailureDomain maps to an availability zone, + // which is a separated group of datacenters within a region. + // See: https://learn.microsoft.com/azure/reliability/availability-zones-overview + // +optional + FailureDomains clusterv1.FailureDomains `json:"failureDomains,omitempty"` } // ExtendedLocationSpec defines the ExtendedLocation properties to enable CAPZ for Azure public MEC. diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 6e83abe36b1..2b355edf37f 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -366,6 +366,13 @@ func (in *AzureClusterClassSpec) DeepCopyInto(out *AzureClusterClassSpec) { *out = new(CloudProviderConfigOverrides) (*in).DeepCopyInto(*out) } + if in.FailureDomains != nil { + in, out := &in.FailureDomains, &out.FailureDomains + *out = make(apiv1beta1.FailureDomains, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AzureClusterClassSpec. diff --git a/azure/scope/cluster.go b/azure/scope/cluster.go index d03f73e0b18..32d8b0ed409 100644 --- a/azure/scope/cluster.go +++ b/azure/scope/cluster.go @@ -875,11 +875,17 @@ func (s *ClusterScope) APIServerHost() string { return s.APIServerPublicIP().DNSName } -// SetFailureDomain will set the spec for a for a given key. +// SetFailureDomain sets a failure domain in a cluster's status by its id. +// The provided failure domain spec may be overridden to false by cluster's spec property. func (s *ClusterScope) SetFailureDomain(id string, spec clusterv1.FailureDomainSpec) { if s.AzureCluster.Status.FailureDomains == nil { s.AzureCluster.Status.FailureDomains = make(clusterv1.FailureDomains) } + + if fd, ok := s.AzureCluster.Spec.FailureDomains[id]; ok && !fd.ControlPlane { + spec.ControlPlane = false + } + s.AzureCluster.Status.FailureDomains[id] = spec } diff --git a/azure/scope/cluster_test.go b/azure/scope/cluster_test.go index 195ecd1d2f4..a44f66f09ec 100644 --- a/azure/scope/cluster_test.go +++ b/azure/scope/cluster_test.go @@ -3438,3 +3438,73 @@ func TestVNetPeerings(t *testing.T) { }) } } + +func TestSetFailureDomain(t *testing.T) { + t.Parallel() + + cases := map[string]struct { + discoveredFDs clusterv1.FailureDomains + specifiedFDs clusterv1.FailureDomains + expectedFDs clusterv1.FailureDomains + }{ + "no failure domains specified": { + discoveredFDs: clusterv1.FailureDomains{ + "fd1": clusterv1.FailureDomainSpec{ControlPlane: true}, + "fd2": clusterv1.FailureDomainSpec{ControlPlane: false}, + }, + expectedFDs: clusterv1.FailureDomains{ + "fd1": clusterv1.FailureDomainSpec{ControlPlane: true}, + "fd2": clusterv1.FailureDomainSpec{ControlPlane: false}, + }, + }, + "no failure domains discovered": { + specifiedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: true}}, + }, + "failure domain specified without intersection": { + discoveredFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: true}}, + specifiedFDs: clusterv1.FailureDomains{"fd2": clusterv1.FailureDomainSpec{ControlPlane: false}}, + expectedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: true}}, + }, + "failure domain override to false succeeds": { + discoveredFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: true}}, + specifiedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: false}}, + expectedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: false}}, + }, + "failure domain override to true fails": { + discoveredFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: false}}, + specifiedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: true}}, + expectedFDs: clusterv1.FailureDomains{"fd1": clusterv1.FailureDomainSpec{ControlPlane: false}}, + }, + } + + for name, tc := range cases { + tc := tc + t.Run(name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + c := ClusterScope{ + AzureCluster: &infrav1.AzureCluster{ + Spec: infrav1.AzureClusterSpec{ + AzureClusterClassSpec: infrav1.AzureClusterClassSpec{ + FailureDomains: tc.specifiedFDs, + }, + }, + }, + } + + for fdName, fd := range tc.discoveredFDs { + c.SetFailureDomain(fdName, fd) + } + + for fdName, fd := range tc.expectedFDs { + g.Expect(fdName).Should(BeKeyOf(c.AzureCluster.Status.FailureDomains)) + g.Expect(c.AzureCluster.Status.FailureDomains[fdName].ControlPlane).To(Equal(fd.ControlPlane)) + + delete(c.AzureCluster.Status.FailureDomains, fdName) + } + + g.Expect(c.AzureCluster.Status.FailureDomains).To(BeEmpty()) + }) + } +} diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml index 83967b1d889..3a40664dfbf 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml @@ -558,6 +558,28 @@ spec: - name - type type: object + failureDomains: + additionalProperties: + description: FailureDomainSpec is the Schema for Cluster API failure + domains. It allows controllers to understand how many failure + domains a cluster can optionally span across. + properties: + attributes: + additionalProperties: + type: string + description: Attributes is a free form map of attributes an + infrastructure provider might use or require. + type: object + controlPlane: + description: ControlPlane determines if this failure domain + is suitable for use by control plane machines. + type: boolean + type: object + description: 'FailureDomains is a list of failure domains in the cluster''s + region, used to restrict eligibility to host the control plane. + A FailureDomain maps to an availability zone, which is a separated + group of datacenters within a region. See: https://learn.microsoft.com/azure/reliability/availability-zones-overview' + type: object identityRef: description: IdentityRef is a reference to an AzureIdentity to be used when reconciling this cluster diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclustertemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclustertemplates.yaml index 33cd31b0eea..5d781bf2d9e 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclustertemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclustertemplates.yaml @@ -432,6 +432,30 @@ spec: - name - type type: object + failureDomains: + additionalProperties: + description: FailureDomainSpec is the Schema for Cluster + API failure domains. It allows controllers to understand + how many failure domains a cluster can optionally span + across. + properties: + attributes: + additionalProperties: + type: string + description: Attributes is a free form map of attributes + an infrastructure provider might use or require. + type: object + controlPlane: + description: ControlPlane determines if this failure + domain is suitable for use by control plane machines. + type: boolean + type: object + description: 'FailureDomains is a list of failure domains + in the cluster''s region, used to restrict eligibility to + host the control plane. A FailureDomain maps to an availability + zone, which is a separated group of datacenters within a + region. See: https://learn.microsoft.com/azure/reliability/availability-zones-overview' + type: object identityRef: description: IdentityRef is a reference to an AzureIdentity to be used when reconciling this cluster diff --git a/docs/book/src/topics/failure-domains.md b/docs/book/src/topics/failure-domains.md index 59ab536f53c..0886d0bd27d 100644 --- a/docs/book/src/topics/failure-domains.md +++ b/docs/book/src/topics/failure-domains.md @@ -128,6 +128,21 @@ spec: ``` +If you can't use `Machine` (or `MachineDeployment`) to explicitly place your VMs (for example, `KubeadmControlPlane` does not accept those as an object reference but rather uses `AzureMachineTemplate` directly), then you can opt to restrict the announcement of discovered failure domains from the cluster's status itself. + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: eastus + failureDomains: + 1: + controlPlane: true +``` + ### Using Virtual Machine Scale Sets You can use an `AzureMachinePool` object to deploy a Virtual Machine Scale Set which automatically distributes VM instances across the configured availability zones.