From 836c4fd678ac97c220decd58993403f6b54652cc Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Wed, 19 Oct 2022 14:24:36 -0500 Subject: [PATCH] add AKS resource health to AzureManagedControlPlane --- api/v1beta1/conditions_consts.go | 2 + azure/converters/resourcehealth.go | 69 ++++++ azure/converters/resourcehealth_test.go | 101 ++++++++ azure/defaults.go | 5 + azure/scope/managedcontrolplane.go | 24 ++ azure/services/resourcehealth/client.go | 57 +++++ .../mock_resourcehealth/client_mock.go | 67 +++++ .../resourcehealth/mock_resourcehealth/doc.go | 23 ++ .../resourcehealth_mock.go | 230 ++++++++++++++++++ .../services/resourcehealth/resourcehealth.go | 110 +++++++++ .../resourcehealth/resourcehealth_test.go | 140 +++++++++++ config/manager/manager.yaml | 2 +- docs/book/src/topics/managedcluster.md | 6 + .../azuremanagedcontrolplane_reconciler.go | 2 + feature/feature.go | 9 +- .../controller-manager-patch.yaml | 2 +- test/e2e/config/azure-dev.yaml | 1 + 17 files changed, 847 insertions(+), 3 deletions(-) create mode 100644 azure/converters/resourcehealth.go create mode 100644 azure/converters/resourcehealth_test.go create mode 100644 azure/services/resourcehealth/client.go create mode 100644 azure/services/resourcehealth/mock_resourcehealth/client_mock.go create mode 100644 azure/services/resourcehealth/mock_resourcehealth/doc.go create mode 100644 azure/services/resourcehealth/mock_resourcehealth/resourcehealth_mock.go create mode 100644 azure/services/resourcehealth/resourcehealth.go create mode 100644 azure/services/resourcehealth/resourcehealth_test.go diff --git a/api/v1beta1/conditions_consts.go b/api/v1beta1/conditions_consts.go index 0d5d12a5510..33ddcac0cb1 100644 --- a/api/v1beta1/conditions_consts.go +++ b/api/v1beta1/conditions_consts.go @@ -86,6 +86,8 @@ const ( ManagedClusterRunningCondition clusterv1.ConditionType = "ManagedClusterRunning" // AgentPoolsReadyCondition means the AKS agent pools exist and are ready to be used. AgentPoolsReadyCondition clusterv1.ConditionType = "AgentPoolsReady" + // AzureResourceAvailableCondition means the AKS cluster is healthy according to Azure's Resource Health API. + AzureResourceAvailableCondition clusterv1.ConditionType = "AzureResourceAvailable" ) // Azure Services Conditions and Reasons. diff --git a/azure/converters/resourcehealth.go b/azure/converters/resourcehealth.go new file mode 100644 index 00000000000..c60475b127b --- /dev/null +++ b/azure/converters/resourcehealth.go @@ -0,0 +1,69 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package converters + +import ( + "strings" + + "github.com/Azure/azure-sdk-for-go/services/resourcehealth/mgmt/2020-05-01/resourcehealth" + infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" +) + +// SDKAvailabilityStatusToCondition converts an Azure Resource Health availability status to a status condition. +func SDKAvailabilityStatusToCondition(availStatus resourcehealth.AvailabilityStatus) *clusterv1.Condition { + if availStatus.Properties == nil { + return conditions.FalseCondition(infrav1.AzureResourceAvailableCondition, "", "", "") + } + + state := availStatus.Properties.AvailabilityState + + if state == resourcehealth.AvailabilityStateValuesAvailable { + return conditions.TrueCondition(infrav1.AzureResourceAvailableCondition) + } + + var reason strings.Builder + if availStatus.Properties.ReasonType != nil { + // CAPI specifies Reason should be CamelCase, though the Azure API + // response may include spaces (e.g. "Customer Initiated") + words := strings.Split(*availStatus.Properties.ReasonType, " ") + for _, word := range words { + if len(word) > 0 { + reason.WriteString(strings.ToTitle(word[:1])) + } + if len(word) > 1 { + reason.WriteString(word[1:]) + } + } + } + + var severity clusterv1.ConditionSeverity + switch availStatus.Properties.AvailabilityState { + case resourcehealth.AvailabilityStateValuesUnavailable: + severity = clusterv1.ConditionSeverityError + case resourcehealth.AvailabilityStateValuesDegraded, resourcehealth.AvailabilityStateValuesUnknown: + severity = clusterv1.ConditionSeverityWarning + } + + var message string + if availStatus.Properties.Summary != nil { + message = *availStatus.Properties.Summary + } + + return conditions.FalseCondition(infrav1.AzureResourceAvailableCondition, reason.String(), severity, message) +} diff --git a/azure/converters/resourcehealth_test.go b/azure/converters/resourcehealth_test.go new file mode 100644 index 00000000000..37d58a7e08b --- /dev/null +++ b/azure/converters/resourcehealth_test.go @@ -0,0 +1,101 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package converters + +import ( + "testing" + + "github.com/Azure/azure-sdk-for-go/services/resourcehealth/mgmt/2020-05-01/resourcehealth" + "github.com/Azure/go-autorest/autorest/to" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" +) + +func TestAzureAvailabilityStatusToCondition(t *testing.T) { + tests := []struct { + name string + avail resourcehealth.AvailabilityStatus + expected *clusterv1.Condition + }{ + { + name: "empty", + avail: resourcehealth.AvailabilityStatus{}, + expected: &clusterv1.Condition{ + Status: corev1.ConditionFalse, + }, + }, + { + name: "available", + avail: resourcehealth.AvailabilityStatus{ + Properties: &resourcehealth.AvailabilityStatusProperties{ + AvailabilityState: resourcehealth.AvailabilityStateValuesAvailable, + }, + }, + expected: &clusterv1.Condition{ + Status: corev1.ConditionTrue, + }, + }, + { + name: "unavailable", + avail: resourcehealth.AvailabilityStatus{ + Properties: &resourcehealth.AvailabilityStatusProperties{ + AvailabilityState: resourcehealth.AvailabilityStateValuesUnavailable, + ReasonType: to.StringPtr("this Is a reason "), + Summary: to.StringPtr("The Summary"), + }, + }, + expected: &clusterv1.Condition{ + Status: corev1.ConditionFalse, + Severity: clusterv1.ConditionSeverityError, + Reason: "ThisIsAReason", + Message: "The Summary", + }, + }, + { + name: "degraded", + avail: resourcehealth.AvailabilityStatus{ + Properties: &resourcehealth.AvailabilityStatusProperties{ + AvailabilityState: resourcehealth.AvailabilityStateValuesDegraded, + ReasonType: to.StringPtr("TheReason"), + Summary: to.StringPtr("The Summary"), + }, + }, + expected: &clusterv1.Condition{ + Status: corev1.ConditionFalse, + Severity: clusterv1.ConditionSeverityWarning, + Reason: "TheReason", + Message: "The Summary", + }, + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + g := NewWithT(t) + t.Parallel() + + cond := SDKAvailabilityStatusToCondition(test.avail) + + g.Expect(cond.Status).To(Equal(test.expected.Status)) + g.Expect(cond.Severity).To(Equal(test.expected.Severity)) + g.Expect(cond.Reason).To(Equal(test.expected.Reason)) + g.Expect(cond.Message).To(Equal(test.expected.Message)) + }) + } +} diff --git a/azure/defaults.go b/azure/defaults.go index 23b5ad59516..d156aa6c6b4 100644 --- a/azure/defaults.go +++ b/azure/defaults.go @@ -291,6 +291,11 @@ func VirtualNetworkLinkID(subscriptionID, resourceGroup, privateDNSZoneName, vir return fmt.Sprintf("subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/privateDnsZones/%s/virtualNetworkLinks/%s", subscriptionID, resourceGroup, privateDNSZoneName, virtualNetworkLinkName) } +// ManagedClusterID returns the azure resource ID for a given managed cluster. +func ManagedClusterID(subscriptionID, resourceGroup, managedClusterName string) string { + return fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.ContainerService/managedClusters/%s", subscriptionID, resourceGroup, managedClusterName) +} + // GetBootstrappingVMExtension returns the CAPZ Bootstrapping VM extension. // The CAPZ Bootstrapping extension is a simple clone of https://github.com/Azure/custom-script-extension-linux for Linux or // https://docs.microsoft.com/en-us/azure/virtual-machines/extensions/custom-script-windows for Windows. diff --git a/azure/scope/managedcontrolplane.go b/azure/scope/managedcontrolplane.go index cca7cb6da42..7699c552a56 100644 --- a/azure/scope/managedcontrolplane.go +++ b/azure/scope/managedcontrolplane.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "strings" + "time" "github.com/Azure/go-autorest/autorest" "github.com/Azure/go-autorest/autorest/to" @@ -44,6 +45,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) +const resourceHealthWarningInitialGracePeriod = 1 * time.Hour + // ManagedControlPlaneScopeParams defines the input parameters used to create a new managed // control plane. type ManagedControlPlaneScopeParams struct { @@ -197,6 +200,7 @@ func (s *ManagedControlPlaneScope) PatchObject(ctx context.Context) error { infrav1.SubnetsReadyCondition, infrav1.ManagedClusterRunningCondition, infrav1.AgentPoolsReadyCondition, + infrav1.AzureResourceAvailableCondition, }}) } @@ -655,3 +659,23 @@ func (s *ManagedControlPlaneScope) TagsSpecs() []azure.TagsSpec { }, } } + +// AvailabilityStatusResource refers to the AzureManagedControlPlane. +func (s *ManagedControlPlaneScope) AvailabilityStatusResource() conditions.Setter { + return s.ControlPlane +} + +// AvailabilityStatusResourceURI constructs the ID of the underlying AKS resource. +func (s *ManagedControlPlaneScope) AvailabilityStatusResourceURI() string { + return azure.ManagedClusterID(s.SubscriptionID(), s.ResourceGroup(), s.ControlPlane.Name) +} + +// AvailabilityStatusFilter ignores the health metrics connection error that +// occurs on startup for every AKS cluster. +func (s *ManagedControlPlaneScope) AvailabilityStatusFilter(cond *clusterv1.Condition) *clusterv1.Condition { + if time.Since(s.ControlPlane.CreationTimestamp.Time) < resourceHealthWarningInitialGracePeriod && + cond.Severity == clusterv1.ConditionSeverityWarning { + return conditions.TrueCondition(infrav1.AzureResourceAvailableCondition) + } + return cond +} diff --git a/azure/services/resourcehealth/client.go b/azure/services/resourcehealth/client.go new file mode 100644 index 00000000000..3e2af664681 --- /dev/null +++ b/azure/services/resourcehealth/client.go @@ -0,0 +1,57 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcehealth + +import ( + "context" + + "github.com/Azure/azure-sdk-for-go/services/resourcehealth/mgmt/2020-05-01/resourcehealth" + "github.com/Azure/go-autorest/autorest" + "sigs.k8s.io/cluster-api-provider-azure/azure" + "sigs.k8s.io/cluster-api-provider-azure/util/tele" +) + +// client wraps go-sdk. +type client interface { + GetByResource(context.Context, string) (resourcehealth.AvailabilityStatus, error) +} + +// azureClient contains the Azure go-sdk Client. +type azureClient struct { + availabilityStatuses resourcehealth.AvailabilityStatusesClient +} + +// newClient creates a new resource health client from subscription ID. +func newClient(auth azure.Authorizer) *azureClient { + c := newResourceHealthClient(auth.SubscriptionID(), auth.BaseURI(), auth.Authorizer()) + return &azureClient{c} +} + +// newResourceHealthClient creates a new resource health client from subscription ID. +func newResourceHealthClient(subscriptionID string, baseURI string, authorizer autorest.Authorizer) resourcehealth.AvailabilityStatusesClient { + healthClient := resourcehealth.NewAvailabilityStatusesClientWithBaseURI(baseURI, subscriptionID) + azure.SetAutoRestClientDefaults(&healthClient.Client, authorizer) + return healthClient +} + +// GetByResource gets the availability status for the specified resource. +func (ac *azureClient) GetByResource(ctx context.Context, resourceURI string) (resourcehealth.AvailabilityStatus, error) { + ctx, _, done := tele.StartSpanWithLogger(ctx, "resourcehealth.AzureClient.GetByResource") + defer done() + + return ac.availabilityStatuses.GetByResource(ctx, resourceURI, "", "") +} diff --git a/azure/services/resourcehealth/mock_resourcehealth/client_mock.go b/azure/services/resourcehealth/mock_resourcehealth/client_mock.go new file mode 100644 index 00000000000..b5a7c6e0a8c --- /dev/null +++ b/azure/services/resourcehealth/mock_resourcehealth/client_mock.go @@ -0,0 +1,67 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by MockGen. DO NOT EDIT. +// Source: ../client.go + +// Package mock_resourcehealth is a generated GoMock package. +package mock_resourcehealth + +import ( + context "context" + reflect "reflect" + + resourcehealth "github.com/Azure/azure-sdk-for-go/services/resourcehealth/mgmt/2020-05-01/resourcehealth" + gomock "github.com/golang/mock/gomock" +) + +// Mockclient is a mock of client interface. +type Mockclient struct { + ctrl *gomock.Controller + recorder *MockclientMockRecorder +} + +// MockclientMockRecorder is the mock recorder for Mockclient. +type MockclientMockRecorder struct { + mock *Mockclient +} + +// NewMockclient creates a new mock instance. +func NewMockclient(ctrl *gomock.Controller) *Mockclient { + mock := &Mockclient{ctrl: ctrl} + mock.recorder = &MockclientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *Mockclient) EXPECT() *MockclientMockRecorder { + return m.recorder +} + +// GetByResource mocks base method. +func (m *Mockclient) GetByResource(arg0 context.Context, arg1 string) (resourcehealth.AvailabilityStatus, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetByResource", arg0, arg1) + ret0, _ := ret[0].(resourcehealth.AvailabilityStatus) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetByResource indicates an expected call of GetByResource. +func (mr *MockclientMockRecorder) GetByResource(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetByResource", reflect.TypeOf((*Mockclient)(nil).GetByResource), arg0, arg1) +} diff --git a/azure/services/resourcehealth/mock_resourcehealth/doc.go b/azure/services/resourcehealth/mock_resourcehealth/doc.go new file mode 100644 index 00000000000..7c5bbd9816f --- /dev/null +++ b/azure/services/resourcehealth/mock_resourcehealth/doc.go @@ -0,0 +1,23 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Run go generate to regenerate this mock. +// +//go:generate ../../../../hack/tools/bin/mockgen -destination client_mock.go -package mock_resourcehealth -source ../client.go Client +//go:generate ../../../../hack/tools/bin/mockgen -destination resourcehealth_mock.go -package mock_resourcehealth -source ../resourcehealth.go ResourceHealthScope,AvailabilityStatusFilterer +//go:generate /usr/bin/env bash -c "cat ../../../../hack/boilerplate/boilerplate.generatego.txt client_mock.go > _client_mock.go && mv _client_mock.go client_mock.go" +//go:generate /usr/bin/env bash -c "cat ../../../../hack/boilerplate/boilerplate.generatego.txt resourcehealth_mock.go > _resourcehealth_mock.go && mv _resourcehealth_mock.go resourcehealth_mock.go" +package mock_resourcehealth diff --git a/azure/services/resourcehealth/mock_resourcehealth/resourcehealth_mock.go b/azure/services/resourcehealth/mock_resourcehealth/resourcehealth_mock.go new file mode 100644 index 00000000000..03d95059f8b --- /dev/null +++ b/azure/services/resourcehealth/mock_resourcehealth/resourcehealth_mock.go @@ -0,0 +1,230 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by MockGen. DO NOT EDIT. +// Source: ../resourcehealth.go + +// Package mock_resourcehealth is a generated GoMock package. +package mock_resourcehealth + +import ( + reflect "reflect" + + autorest "github.com/Azure/go-autorest/autorest" + gomock "github.com/golang/mock/gomock" + v1beta1 "sigs.k8s.io/cluster-api/api/v1beta1" + conditions "sigs.k8s.io/cluster-api/util/conditions" +) + +// MockResourceHealthScope is a mock of ResourceHealthScope interface. +type MockResourceHealthScope struct { + ctrl *gomock.Controller + recorder *MockResourceHealthScopeMockRecorder +} + +// MockResourceHealthScopeMockRecorder is the mock recorder for MockResourceHealthScope. +type MockResourceHealthScopeMockRecorder struct { + mock *MockResourceHealthScope +} + +// NewMockResourceHealthScope creates a new mock instance. +func NewMockResourceHealthScope(ctrl *gomock.Controller) *MockResourceHealthScope { + mock := &MockResourceHealthScope{ctrl: ctrl} + mock.recorder = &MockResourceHealthScopeMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockResourceHealthScope) EXPECT() *MockResourceHealthScopeMockRecorder { + return m.recorder +} + +// Authorizer mocks base method. +func (m *MockResourceHealthScope) Authorizer() autorest.Authorizer { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Authorizer") + ret0, _ := ret[0].(autorest.Authorizer) + return ret0 +} + +// Authorizer indicates an expected call of Authorizer. +func (mr *MockResourceHealthScopeMockRecorder) Authorizer() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Authorizer", reflect.TypeOf((*MockResourceHealthScope)(nil).Authorizer)) +} + +// AvailabilityStatusResource mocks base method. +func (m *MockResourceHealthScope) AvailabilityStatusResource() conditions.Setter { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AvailabilityStatusResource") + ret0, _ := ret[0].(conditions.Setter) + return ret0 +} + +// AvailabilityStatusResource indicates an expected call of AvailabilityStatusResource. +func (mr *MockResourceHealthScopeMockRecorder) AvailabilityStatusResource() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AvailabilityStatusResource", reflect.TypeOf((*MockResourceHealthScope)(nil).AvailabilityStatusResource)) +} + +// AvailabilityStatusResourceURI mocks base method. +func (m *MockResourceHealthScope) AvailabilityStatusResourceURI() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AvailabilityStatusResourceURI") + ret0, _ := ret[0].(string) + return ret0 +} + +// AvailabilityStatusResourceURI indicates an expected call of AvailabilityStatusResourceURI. +func (mr *MockResourceHealthScopeMockRecorder) AvailabilityStatusResourceURI() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AvailabilityStatusResourceURI", reflect.TypeOf((*MockResourceHealthScope)(nil).AvailabilityStatusResourceURI)) +} + +// BaseURI mocks base method. +func (m *MockResourceHealthScope) BaseURI() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "BaseURI") + ret0, _ := ret[0].(string) + return ret0 +} + +// BaseURI indicates an expected call of BaseURI. +func (mr *MockResourceHealthScopeMockRecorder) BaseURI() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "BaseURI", reflect.TypeOf((*MockResourceHealthScope)(nil).BaseURI)) +} + +// ClientID mocks base method. +func (m *MockResourceHealthScope) ClientID() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ClientID") + ret0, _ := ret[0].(string) + return ret0 +} + +// ClientID indicates an expected call of ClientID. +func (mr *MockResourceHealthScopeMockRecorder) ClientID() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ClientID", reflect.TypeOf((*MockResourceHealthScope)(nil).ClientID)) +} + +// ClientSecret mocks base method. +func (m *MockResourceHealthScope) ClientSecret() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ClientSecret") + ret0, _ := ret[0].(string) + return ret0 +} + +// ClientSecret indicates an expected call of ClientSecret. +func (mr *MockResourceHealthScopeMockRecorder) ClientSecret() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ClientSecret", reflect.TypeOf((*MockResourceHealthScope)(nil).ClientSecret)) +} + +// CloudEnvironment mocks base method. +func (m *MockResourceHealthScope) CloudEnvironment() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CloudEnvironment") + ret0, _ := ret[0].(string) + return ret0 +} + +// CloudEnvironment indicates an expected call of CloudEnvironment. +func (mr *MockResourceHealthScopeMockRecorder) CloudEnvironment() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CloudEnvironment", reflect.TypeOf((*MockResourceHealthScope)(nil).CloudEnvironment)) +} + +// HashKey mocks base method. +func (m *MockResourceHealthScope) HashKey() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "HashKey") + ret0, _ := ret[0].(string) + return ret0 +} + +// HashKey indicates an expected call of HashKey. +func (mr *MockResourceHealthScopeMockRecorder) HashKey() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "HashKey", reflect.TypeOf((*MockResourceHealthScope)(nil).HashKey)) +} + +// SubscriptionID mocks base method. +func (m *MockResourceHealthScope) SubscriptionID() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SubscriptionID") + ret0, _ := ret[0].(string) + return ret0 +} + +// SubscriptionID indicates an expected call of SubscriptionID. +func (mr *MockResourceHealthScopeMockRecorder) SubscriptionID() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SubscriptionID", reflect.TypeOf((*MockResourceHealthScope)(nil).SubscriptionID)) +} + +// TenantID mocks base method. +func (m *MockResourceHealthScope) TenantID() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "TenantID") + ret0, _ := ret[0].(string) + return ret0 +} + +// TenantID indicates an expected call of TenantID. +func (mr *MockResourceHealthScopeMockRecorder) TenantID() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TenantID", reflect.TypeOf((*MockResourceHealthScope)(nil).TenantID)) +} + +// MockAvailabilityStatusFilterer is a mock of AvailabilityStatusFilterer interface. +type MockAvailabilityStatusFilterer struct { + ctrl *gomock.Controller + recorder *MockAvailabilityStatusFiltererMockRecorder +} + +// MockAvailabilityStatusFiltererMockRecorder is the mock recorder for MockAvailabilityStatusFilterer. +type MockAvailabilityStatusFiltererMockRecorder struct { + mock *MockAvailabilityStatusFilterer +} + +// NewMockAvailabilityStatusFilterer creates a new mock instance. +func NewMockAvailabilityStatusFilterer(ctrl *gomock.Controller) *MockAvailabilityStatusFilterer { + mock := &MockAvailabilityStatusFilterer{ctrl: ctrl} + mock.recorder = &MockAvailabilityStatusFiltererMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockAvailabilityStatusFilterer) EXPECT() *MockAvailabilityStatusFiltererMockRecorder { + return m.recorder +} + +// AvailabilityStatusFilter mocks base method. +func (m *MockAvailabilityStatusFilterer) AvailabilityStatusFilter(cond *v1beta1.Condition) *v1beta1.Condition { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AvailabilityStatusFilter", cond) + ret0, _ := ret[0].(*v1beta1.Condition) + return ret0 +} + +// AvailabilityStatusFilter indicates an expected call of AvailabilityStatusFilter. +func (mr *MockAvailabilityStatusFiltererMockRecorder) AvailabilityStatusFilter(cond interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AvailabilityStatusFilter", reflect.TypeOf((*MockAvailabilityStatusFilterer)(nil).AvailabilityStatusFilter), cond) +} diff --git a/azure/services/resourcehealth/resourcehealth.go b/azure/services/resourcehealth/resourcehealth.go new file mode 100644 index 00000000000..988b418f616 --- /dev/null +++ b/azure/services/resourcehealth/resourcehealth.go @@ -0,0 +1,110 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcehealth + +import ( + "context" + + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-azure/azure" + "sigs.k8s.io/cluster-api-provider-azure/azure/converters" + "sigs.k8s.io/cluster-api-provider-azure/feature" + "sigs.k8s.io/cluster-api-provider-azure/util/tele" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" +) + +const serviceName = "resourcehealth" + +// ResourceHealthScope defines the scope interface for a resourcehealth service. +type ResourceHealthScope interface { + azure.Authorizer + AvailabilityStatusResourceURI() string + AvailabilityStatusResource() conditions.Setter +} + +// AvailabilityStatusFilterer transforms the condition derived from the +// availability status to allow the condition to be overridden in specific +// circumstances. +type AvailabilityStatusFilterer interface { + AvailabilityStatusFilter(cond *clusterv1.Condition) *clusterv1.Condition +} + +// Service provides operations on Azure resources. +type Service struct { + Scope ResourceHealthScope + client +} + +// New creates a new service. +func New(scope ResourceHealthScope) *Service { + return &Service{ + Scope: scope, + client: newClient(scope), + } +} + +// Name returns the service name. +func (s *Service) Name() string { + return serviceName +} + +// Reconcile ensures the resource's availability status is reflected in its own status. +func (s *Service) Reconcile(ctx context.Context) error { + ctx, log, done := tele.StartSpanWithLogger(ctx, "resourcehealth.Service.Reconcile") + defer done() + + if !feature.Gates.Enabled(feature.AKSResourceHealth) { + conditions.Delete(s.Scope.AvailabilityStatusResource(), infrav1.AzureResourceAvailableCondition) + return nil + } + + resource := s.Scope.AvailabilityStatusResourceURI() + availStatus, err := s.GetByResource(ctx, resource) + if err != nil { + return errors.Wrapf(err, "failed to get availability status for resource %s", resource) + } + log.V(2).Info("got availability status for resource", "resource", resource, "status", availStatus) + + cond := converters.SDKAvailabilityStatusToCondition(availStatus) + if filterer, ok := s.Scope.(AvailabilityStatusFilterer); ok { + cond = filterer.AvailabilityStatusFilter(cond) + } + + conditions.Set(s.Scope.AvailabilityStatusResource(), cond) + + if cond.Status == corev1.ConditionFalse { + return errors.Errorf("resource is not available: %s", cond.Message) + } + + return nil +} + +// Delete is a no-op. +func (s *Service) Delete(ctx context.Context) error { + _, _, done := tele.StartSpanWithLogger(ctx, "resourcehealth.Service.Delete") + defer done() + + return nil +} + +// IsManaged always returns true. +func (s *Service) IsManaged(ctx context.Context) (bool, error) { + return true, nil +} diff --git a/azure/services/resourcehealth/resourcehealth_test.go b/azure/services/resourcehealth/resourcehealth_test.go new file mode 100644 index 00000000000..1dbcd21b184 --- /dev/null +++ b/azure/services/resourcehealth/resourcehealth_test.go @@ -0,0 +1,140 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcehealth + +import ( + "context" + "testing" + + "github.com/Azure/azure-sdk-for-go/services/resourcehealth/mgmt/2020-05-01/resourcehealth" + "github.com/Azure/go-autorest/autorest/to" + "github.com/golang/mock/gomock" + . "github.com/onsi/gomega" + "github.com/pkg/errors" + utilfeature "k8s.io/component-base/featuregate/testing" + infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-azure/azure/services/resourcehealth/mock_resourcehealth" + "sigs.k8s.io/cluster-api-provider-azure/feature" + gomockinternal "sigs.k8s.io/cluster-api-provider-azure/internal/test/matchers/gomock" + "sigs.k8s.io/cluster-api/util/conditions" +) + +func TestReconcileResourceHealth(t *testing.T) { + testcases := []struct { + name string + featureDisabled bool + filterEnabled bool + expect func(s *mock_resourcehealth.MockResourceHealthScopeMockRecorder, m *mock_resourcehealth.MockclientMockRecorder, f *mock_resourcehealth.MockAvailabilityStatusFiltererMockRecorder) + expectedError string + }{ + { + name: "available resource", + expect: func(s *mock_resourcehealth.MockResourceHealthScopeMockRecorder, m *mock_resourcehealth.MockclientMockRecorder, _ *mock_resourcehealth.MockAvailabilityStatusFiltererMockRecorder) { + s.AvailabilityStatusResource().Times(1) + s.AvailabilityStatusResourceURI().Times(1) + m.GetByResource(gomockinternal.AContext(), gomock.Any()).Times(1).Return(resourcehealth.AvailabilityStatus{ + Properties: &resourcehealth.AvailabilityStatusProperties{ + AvailabilityState: resourcehealth.AvailabilityStateValuesAvailable, + }, + }, nil) + }, + expectedError: "", + }, + { + name: "unavailable resource", + expect: func(s *mock_resourcehealth.MockResourceHealthScopeMockRecorder, m *mock_resourcehealth.MockclientMockRecorder, _ *mock_resourcehealth.MockAvailabilityStatusFiltererMockRecorder) { + s.AvailabilityStatusResource().Times(1) + s.AvailabilityStatusResourceURI().Times(1) + m.GetByResource(gomockinternal.AContext(), gomock.Any()).Times(1).Return(resourcehealth.AvailabilityStatus{ + Properties: &resourcehealth.AvailabilityStatusProperties{ + AvailabilityState: resourcehealth.AvailabilityStateValuesUnavailable, + Summary: to.StringPtr("summary"), + }, + }, nil) + }, + expectedError: "resource is not available: summary", + }, + { + name: "API error", + expect: func(s *mock_resourcehealth.MockResourceHealthScopeMockRecorder, m *mock_resourcehealth.MockclientMockRecorder, _ *mock_resourcehealth.MockAvailabilityStatusFiltererMockRecorder) { + s.AvailabilityStatusResourceURI().Times(1).Return("myURI") + m.GetByResource(gomockinternal.AContext(), gomock.Any()).Times(1).Return(resourcehealth.AvailabilityStatus{}, errors.New("some API error")) + }, + expectedError: "failed to get availability status for resource myURI: some API error", + }, + { + name: "filter", + filterEnabled: true, + expect: func(s *mock_resourcehealth.MockResourceHealthScopeMockRecorder, m *mock_resourcehealth.MockclientMockRecorder, f *mock_resourcehealth.MockAvailabilityStatusFiltererMockRecorder) { + s.AvailabilityStatusResource().Times(1) + s.AvailabilityStatusResourceURI().Times(1) + m.GetByResource(gomockinternal.AContext(), gomock.Any()).Times(1).Return(resourcehealth.AvailabilityStatus{ + Properties: &resourcehealth.AvailabilityStatusProperties{ + AvailabilityState: resourcehealth.AvailabilityStateValuesUnavailable, + Summary: to.StringPtr("summary"), + }, + }, nil) + // ignore the above status + f.AvailabilityStatusFilter(gomock.Any()).Return(conditions.TrueCondition(infrav1.AzureResourceAvailableCondition)) + }, + expectedError: "", + }, + { + name: "feature disabled", + featureDisabled: true, + expect: func(s *mock_resourcehealth.MockResourceHealthScopeMockRecorder, _ *mock_resourcehealth.MockclientMockRecorder, _ *mock_resourcehealth.MockAvailabilityStatusFiltererMockRecorder) { + s.AvailabilityStatusResource().Times(1) + }, + expectedError: "", + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + scopeMock := mock_resourcehealth.NewMockResourceHealthScope(mockCtrl) + clientMock := mock_resourcehealth.NewMockclient(mockCtrl) + filtererMock := mock_resourcehealth.NewMockAvailabilityStatusFilterer(mockCtrl) + + tc.expect(scopeMock.EXPECT(), clientMock.EXPECT(), filtererMock.EXPECT()) + + s := &Service{ + Scope: scopeMock, + client: clientMock, + } + if tc.filterEnabled { + s.Scope = struct { + ResourceHealthScope + AvailabilityStatusFilterer + }{scopeMock, filtererMock} + } + + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.AKSResourceHealth, !tc.featureDisabled)() + + err := s.Reconcile(context.TODO()) + + if tc.expectedError != "" { + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(MatchError(tc.expectedError)) + } else { + g.Expect(err).NotTo(HaveOccurred()) + } + }) + } +} diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 05662c76220..8a7f0be2878 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -23,7 +23,7 @@ spec: - args: - --leader-elect - "--metrics-bind-addr=localhost:8080" - - "--feature-gates=MachinePool=${EXP_MACHINE_POOL:=false},AKS=${EXP_AKS:=false}" + - "--feature-gates=MachinePool=${EXP_MACHINE_POOL:=false},AKS=${EXP_AKS:=false},AKSResourceHealth=${EXP_AKS_RESOURCE_HEALTH:=false}" - "--v=0" image: controller:latest imagePullPolicy: Always diff --git a/docs/book/src/topics/managedcluster.md b/docs/book/src/topics/managedcluster.md index b5e1da774a7..67d4b830036 100644 --- a/docs/book/src/topics/managedcluster.md +++ b/docs/book/src/topics/managedcluster.md @@ -63,6 +63,12 @@ export EXP_MACHINE_POOL=true export EXP_AKS=true ``` +Optionally, the following feature flags may be set: + +```bash +export EXP_AKS_RESOURCE_HEALTH=true +``` + Create a local kind cluster to run the management cluster components: ```bash diff --git a/exp/controllers/azuremanagedcontrolplane_reconciler.go b/exp/controllers/azuremanagedcontrolplane_reconciler.go index afc0b301d38..e1dec506dc9 100644 --- a/exp/controllers/azuremanagedcontrolplane_reconciler.go +++ b/exp/controllers/azuremanagedcontrolplane_reconciler.go @@ -24,6 +24,7 @@ import ( "sigs.k8s.io/cluster-api-provider-azure/azure/scope" "sigs.k8s.io/cluster-api-provider-azure/azure/services/groups" "sigs.k8s.io/cluster-api-provider-azure/azure/services/managedclusters" + "sigs.k8s.io/cluster-api-provider-azure/azure/services/resourcehealth" "sigs.k8s.io/cluster-api-provider-azure/azure/services/subnets" "sigs.k8s.io/cluster-api-provider-azure/azure/services/tags" "sigs.k8s.io/cluster-api-provider-azure/azure/services/virtualnetworks" @@ -51,6 +52,7 @@ func newAzureManagedControlPlaneReconciler(scope *scope.ManagedControlPlaneScope subnets.New(scope), managedclusters.New(scope), tags.New(scope), + resourcehealth.New(scope), }, } } diff --git a/feature/feature.go b/feature/feature.go index 17edc425ae0..d217517082c 100644 --- a/feature/feature.go +++ b/feature/feature.go @@ -34,6 +34,12 @@ const ( // owner: @alexeldeib // alpha: v0.4 AKS featuregate.Feature = "AKS" + + // AKSResourceHealth is the feature gate for reporting Azure Resource Health + // on AKS managed clusters. + // owner: @nojnhuh + // alpha: v1.7 + AKSResourceHealth featuregate.Feature = "AKSResourceHealth" ) func init() { @@ -44,5 +50,6 @@ func init() { // To add a new feature, define a key for it above and add it here. var defaultCAPZFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ // Every feature should be initiated here: - AKS: {Default: false, PreRelease: featuregate.Alpha}, + AKS: {Default: false, PreRelease: featuregate.Alpha}, + AKSResourceHealth: {Default: false, PreRelease: featuregate.Alpha}, } diff --git a/hack/observability/opentelemetry/controller-manager-patch.yaml b/hack/observability/opentelemetry/controller-manager-patch.yaml index 69da9ee65af..8b8db675132 100644 --- a/hack/observability/opentelemetry/controller-manager-patch.yaml +++ b/hack/observability/opentelemetry/controller-manager-patch.yaml @@ -11,5 +11,5 @@ spec: args: - "--metrics-bind-addr=:8080" - "--leader-elect" - - "--feature-gates=MachinePool=${EXP_MACHINE_POOL:=false},AKS=${EXP_AKS:=false}" + - "--feature-gates=MachinePool=${EXP_MACHINE_POOL:=false},AKS=${EXP_AKS:=false},AKSResourceHealth=${EXP_AKS_RESOURCE_HEALTH:=false}" - "--enable-tracing" diff --git a/test/e2e/config/azure-dev.yaml b/test/e2e/config/azure-dev.yaml index cc4ddbc5faf..14c73ac3453 100644 --- a/test/e2e/config/azure-dev.yaml +++ b/test/e2e/config/azure-dev.yaml @@ -156,6 +156,7 @@ variables: CNI: "${PWD}/templates/addons/calico.yaml" REDACT_LOG_SCRIPT: "${PWD}/hack/log/redact.sh" EXP_AKS: "true" + EXP_AKS_RESOURCE_HEALTH: "true" EXP_MACHINE_POOL: "true" EXP_CLUSTER_RESOURCE_SET: "true" CLUSTER_TOPOLOGY: "true"