diff --git a/azure/scope/cluster.go b/azure/scope/cluster.go index c2760667a06..a7741425392 100644 --- a/azure/scope/cluster.go +++ b/azure/scope/cluster.go @@ -38,6 +38,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/azure" "sigs.k8s.io/cluster-api-provider-azure/azure/services/groups" + "sigs.k8s.io/cluster-api-provider-azure/azure/services/natgateways" "sigs.k8s.io/cluster-api-provider-azure/azure/services/vnetpeerings" "sigs.k8s.io/cluster-api-provider-azure/util/futures" "sigs.k8s.io/cluster-api-provider-azure/util/tele" @@ -230,14 +231,17 @@ func (s *ClusterScope) RouteTableSpecs() []azure.RouteTableSpec { } // NatGatewaySpecs returns the node nat gateway. -func (s *ClusterScope) NatGatewaySpecs() []azure.NatGatewaySpec { - natGateways := []azure.NatGatewaySpec{} +func (s *ClusterScope) NatGatewaySpecs() []azure.ResourceSpecGetter { + natGateways := []azure.ResourceSpecGetter{} // We ignore the control plane nat gateway, as we will always use a LB to enable egress on the control plane. for _, subnet := range s.NodeSubnets() { if subnet.IsNatGatewayEnabled() { - natGateways = append(natGateways, azure.NatGatewaySpec{ - Name: subnet.NatGateway.Name, + natGateways = append(natGateways, &natgateways.NatGatewaySpec{ + Name: subnet.NatGateway.Name, + ResourceGroup: s.ResourceGroup(), + SubscriptionID: s.SubscriptionID(), + Location: s.Location(), NatGatewayIP: infrav1.PublicIPSpec{ Name: subnet.NatGateway.NatGatewayIP.Name, }, diff --git a/azure/services/natgateways/client.go b/azure/services/natgateways/client.go index db832862167..961de703eb1 100644 --- a/azure/services/natgateways/client.go +++ b/azure/services/natgateways/client.go @@ -18,19 +18,26 @@ package natgateways import ( "context" + "encoding/json" - "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2019-06-01/network" + "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2021-02-01/network" "github.com/Azure/go-autorest/autorest" + azureautorest "github.com/Azure/go-autorest/autorest/azure" + "github.com/pkg/errors" + infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/azure" + "sigs.k8s.io/cluster-api-provider-azure/util/reconciler" "sigs.k8s.io/cluster-api-provider-azure/util/tele" ) // client wraps go-sdk. type client interface { Get(context.Context, string, string) (network.NatGateway, error) - CreateOrUpdate(context.Context, string, string, network.NatGateway) error - Delete(context.Context, string, string) error + CreateOrUpdateAsync(context.Context, azure.ResourceSpecGetter) (interface{}, azureautorest.FutureAPI, error) + DeleteAsync(context.Context, azure.ResourceSpecGetter) (azureautorest.FutureAPI, error) + IsDone(context.Context, azureautorest.FutureAPI) (bool, error) + Result(context.Context, azureautorest.FutureAPI, string) (interface{}, error) } // azureClient contains the Azure go-sdk Client. @@ -55,42 +62,127 @@ func netNatGatewaysClient(subscriptionID string, baseURI string, authorizer auto // Get gets the specified nat gateway. func (ac *azureClient) Get(ctx context.Context, resourceGroupName, natGatewayName string) (network.NatGateway, error) { - ctx, _, done := tele.StartSpanWithLogger(ctx, "natgateways.AzureClient.Get") + ctx, _, done := tele.StartSpanWithLogger(ctx, "natgateways.azureClient.Get") defer done() return ac.natgateways.Get(ctx, resourceGroupName, natGatewayName, "") } -// CreateOrUpdate create or updates a nat gateway in a specified resource group. -func (ac *azureClient) CreateOrUpdate(ctx context.Context, resourceGroupName string, natGatewayName string, natGateway network.NatGateway) error { - ctx, _, done := tele.StartSpanWithLogger(ctx, "natgateways.AzureClient.CreateOrUpdate") - defer done() +// CreateOrUpdateAsync creates or updates a Nat Gateway asynchronously. +// It sends a PUT request to Azure and if accepted without error, the func will return a Future which can be used to track the ongoing +// progress of the operation. +func (ac *azureClient) CreateOrUpdateAsync(ctx context.Context, spec azure.ResourceSpecGetter) (interface{}, azureautorest.FutureAPI, error) { + ctx, span := tele.Tracer().Start(ctx, "natgateways.azureClient.CreateOrUpdateAsync") + defer span.End() + + var existingNatGateway interface{} + + if existing, err := ac.Get(ctx, spec.ResourceGroupName(), spec.ResourceName()); err != nil && !azure.ResourceNotFound(err) { + return nil, nil, errors.Wrapf(err, "failed to get Nat Gateway %s for %s in %s", spec.ResourceName(), spec.OwnerResourceName(), spec.ResourceGroupName()) + } else if err == nil { + existingNatGateway = existing + } + + params, err := spec.Parameters(existingNatGateway) + if err != nil { + return nil, nil, errors.Wrapf(err, "failed to get desired parameters for Nat Gateway %s", spec.ResourceName()) + } - future, err := ac.natgateways.CreateOrUpdate(ctx, resourceGroupName, natGatewayName, natGateway) + natGateway, ok := params.(network.NatGateway) + if !ok { + if params == nil { + // nothing to do here. + return existingNatGateway, nil, nil + } + return nil, nil, errors.Errorf("%T is not a network.NatGateway", params) + } + + future, err := ac.natgateways.CreateOrUpdate(ctx, spec.ResourceGroupName(), spec.ResourceName(), natGateway) if err != nil { - return err + return nil, nil, err } + + ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultAzureCallTimeout) + defer cancel() + err = future.WaitForCompletionRef(ctx, ac.natgateways.Client) if err != nil { - return err + // if an error occurs, return the future. + // this means the long-running operation didn't finish in the specified timeout. + return nil, &future, err } - _, err = future.Result(ac.natgateways) - return err + + result, err := future.Result(ac.natgateways) + + // if the operation completed, return a nil future + return result, nil, err } -// Delete deletes the specified nat gateway. -func (ac *azureClient) Delete(ctx context.Context, resourceGroupName, natGatewayName string) error { - ctx, _, done := tele.StartSpanWithLogger(ctx, "natgateways.AzureClient.Delete") - defer done() +// DeleteAsync deletes a Nat Gateway asynchronously. DeleteAsync sends a DELETE +// request to Azure and if accepted without error, the func will return a Future which can be used to track the ongoing +// progress of the operation. +func (ac *azureClient) DeleteAsync(ctx context.Context, spec azure.ResourceSpecGetter) (azureautorest.FutureAPI, error) { + ctx, span := tele.Tracer().Start(ctx, "natgateways.azureClient.Delete") + defer span.End() - future, err := ac.natgateways.Delete(ctx, resourceGroupName, natGatewayName) + future, err := ac.natgateways.Delete(ctx, spec.ResourceGroupName(), spec.ResourceName()) if err != nil { - return err + return nil, err } + + ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultAzureCallTimeout) + defer cancel() + err = future.WaitForCompletionRef(ctx, ac.natgateways.Client) if err != nil { - return err + // if an error occurs, return the future. + // this means the long-running operation didn't finish in the specified timeout. + return &future, err } _, err = future.Result(ac.natgateways) - return err + // if the operation completed, return a nil future. + return nil, err +} + +// IsDone returns true if the long-running operation has completed. +func (ac *azureClient) IsDone(ctx context.Context, future azureautorest.FutureAPI) (bool, error) { + ctx, span := tele.Tracer().Start(ctx, "natgateways.azureClient.IsDone") + defer span.End() + + done, err := future.DoneWithContext(ctx, ac.natgateways) + if err != nil { + return false, errors.Wrap(err, "failed checking if the operation was complete") + } + + return done, nil +} + +// Result fetches the result of a long-running operation future. +func (ac *azureClient) Result(ctx context.Context, futureData azureautorest.FutureAPI, futureType string) (interface{}, error) { + if futureData == nil { + return nil, errors.Errorf("cannot get result from nil future") + } + var result func(client network.NatGatewaysClient) (natGateway network.NatGateway, err error) + + switch futureType { + case infrav1.PutFuture: + var future *network.NatGatewaysCreateOrUpdateFuture + jsonData, err := futureData.MarshalJSON() + if err != nil { + return nil, errors.Wrap(err, "failed to marshal future") + } + if err := json.Unmarshal(jsonData, &future); err != nil { + return nil, errors.Wrap(err, "failed to unmarshal future data") + } + result = (*future).Result + + case infrav1.DeleteFuture: + // Delete does not return a result Nat Gateway + return nil, nil + + default: + return nil, errors.Errorf("unknown future type %q", futureType) + } + + return result(ac.natgateways) } diff --git a/azure/services/natgateways/natgateways.go b/azure/services/natgateways/natgateways.go index bd79d52d006..43aafef5dd1 100644 --- a/azure/services/natgateways/natgateways.go +++ b/azure/services/natgateways/natgateways.go @@ -18,24 +18,26 @@ package natgateways import ( "context" - "fmt" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2019-06-01/network" - autorest "github.com/Azure/go-autorest/autorest/azure" - "github.com/Azure/go-autorest/autorest/to" "github.com/go-logr/logr" "github.com/pkg/errors" infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/azure" + "sigs.k8s.io/cluster-api-provider-azure/azure/services/async" + "sigs.k8s.io/cluster-api-provider-azure/util/reconciler" "sigs.k8s.io/cluster-api-provider-azure/util/tele" ) +const serviceName = "natgateways" + // NatGatewayScope defines the scope interface for nat gateway service. type NatGatewayScope interface { logr.Logger azure.ClusterScoper - NatGatewaySpecs() []azure.NatGatewaySpec + azure.AsyncStatusUpdater + NatGatewaySpecs() []azure.ResourceSpecGetter } // Service provides operations on azure resources. @@ -55,119 +57,74 @@ func New(scope NatGatewayScope) *Service { // Reconcile gets/creates/updates a nat gateway. // Only when the Nat Gateway 'Name' property is defined we create the Nat Gateway: it's opt-in. func (s *Service) Reconcile(ctx context.Context) error { - ctx, _, done := tele.StartSpanWithLogger(ctx, "natgateways.Service.Reconcile") - defer done() + ctx, span := tele.Tracer().Start(ctx, "natgateways.Service.Reconcile") + defer span.End() - if !s.Scope.Vnet().IsManaged(s.Scope.ClusterName()) { - s.Scope.V(4).Info("Skipping nat gateways reconcile in custom vnet mode") - return nil - } + ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultAzureServiceReconcileTimeout) + defer cancel() + // We go through the list of NatGatewaySpecs to reconcile each one, independently of the result of the previous one. + // If multiple erros occur, we return the most pressing one + // order of precedence is: error creating -> creating in progress -> created (no error) + var result error for _, natGatewaySpec := range s.Scope.NatGatewaySpecs() { - existingNatGateway, err := s.getExisting(ctx, natGatewaySpec) - - switch { - case err != nil && !azure.ResourceNotFound(err): - return errors.Wrapf(err, "failed to get nat gateway %s in %s", natGatewaySpec.Name, s.Scope.ResourceGroup()) - case err == nil: - // nat gateway already exists - s.Scope.V(4).Info("nat gateway already exists", "nat gateway", natGatewaySpec.Name) - natGatewaySpec.Subnet.NatGateway.ID = existingNatGateway.ID - - if existingNatGateway.NatGatewayIP.Name == natGatewaySpec.NatGatewayIP.Name { - // Skip update for Nat Gateway as it exists with expected values - s.Scope.V(4).Info("Nat Gateway exists with expected values, skipping update", "nat gateway", natGatewaySpec.Name) - natGatewaySpec.Subnet.NatGateway = *existingNatGateway - s.Scope.SetSubnet(natGatewaySpec.Subnet) - continue - } else { - s.Scope.V(2).Info("updating NAT gateway IP name to match the spec", "old name", existingNatGateway.NatGatewayIP.Name, "desired name", natGatewaySpec.NatGatewayIP.Name) - } - default: - // nat gateway doesn't exist but its name was specified in the subnet, let's create it - s.Scope.V(2).Info("nat gateway doesn't exist yet, creating it", "nat gateway", natGatewaySpec.Name) - } - - natGatewayToCreate := network.NatGateway{ - Location: to.StringPtr(s.Scope.Location()), - Sku: &network.NatGatewaySku{Name: network.Standard}, - NatGatewayPropertiesFormat: &network.NatGatewayPropertiesFormat{ - PublicIPAddresses: &[]network.SubResource{ - { - ID: to.StringPtr(azure.PublicIPID(s.Scope.SubscriptionID(), s.Scope.ResourceGroup(), natGatewaySpec.NatGatewayIP.Name)), - }, - }, - }, - } - err = s.client.CreateOrUpdate(ctx, s.Scope.ResourceGroup(), natGatewaySpec.Name, natGatewayToCreate) + natGateway, err := async.CreateResource(ctx, s.Scope, s.client, natGatewaySpec, serviceName) if err != nil { - return errors.Wrapf(err, "failed to create nat gateway %s in resource group %s", natGatewaySpec.Name, s.Scope.ResourceGroup()) + if !azure.IsOperationNotDoneError(err) || result == nil { + result = err + } } - s.Scope.V(2).Info("successfully created nat gateway", "nat gateway", natGatewaySpec.Name) - natGateway := infrav1.NatGateway{ - ID: azure.NatGatewayID(s.Scope.SubscriptionID(), s.Scope.ResourceGroup(), natGatewaySpec.Name), - Name: natGatewaySpec.Name, - NatGatewayIP: infrav1.PublicIPSpec{ - Name: natGatewaySpec.NatGatewayIP.Name, - }, + if err == nil && result != nil { + // TODO: is there a better way to do this? + // natGatewaySpec.Subnet.NatGateway = natGateway + subnetSpec := s.Scope.Subnet(natGatewaySpec.OwnerResourceName()) + networkNatGateway, ok := natGateway.(network.NatGateway) + if !ok { + result = errors.Errorf("%T is not a network.NatGateway", natGateway) + } + natGatewayInfraV1Spec, err := ParseExistingNatGateway(networkNatGateway) + if err != nil { + result = err + } + subnetSpec.NatGateway = *natGatewayInfraV1Spec + s.Scope.SetSubnet(subnetSpec) + + // natGateway := infrav1.NatGateway{ + // ID: azure.NatGatewayID(s.Scope.SubscriptionID(), s.Scope.ResourceGroup(), natGatewaySpec.Name), + // Name: natGatewaySpec.ResourceName(), + // NatGatewayIP: infrav1.PublicIPSpec{ + // Name: natGatewaySpec.NatGatewayIP.Name, + // }, + // } + // natGatewaySpec.Subnet.NatGateway = natGateway + // s.Scope.SetSubnet(natGatewaySpec.Subnet) } - natGatewaySpec.Subnet.NatGateway = natGateway - s.Scope.SetSubnet(natGatewaySpec.Subnet) - } - return nil -} - -func (s *Service) getExisting(ctx context.Context, spec azure.NatGatewaySpec) (*infrav1.NatGateway, error) { - existingNatGateway, err := s.Get(ctx, s.Scope.ResourceGroup(), spec.Name) - if err != nil { - return nil, err - } - // We must have a non-nil, non-"empty" PublicIPAddresses - if !(existingNatGateway.PublicIPAddresses != nil && len(*existingNatGateway.PublicIPAddresses) > 0) { - return nil, errors.Wrap(err, "failed to parse PublicIPAddresses") - } - // TODO do we want to eventually handle NatGateway resources w/ more than one public IP address? - // For now we assume the first one is the significant one - publicIPAddressID := to.String((*existingNatGateway.PublicIPAddresses)[0].ID) - resource, err := autorest.ParseResourceID(publicIPAddressID) - if err != nil { - return nil, errors.Wrap(err, "failed to parse Resource ID from PublicIPAddresses ID") - } - // We depend upon a non-empty ResourceName string - if resource.ResourceName == "" { - return nil, errors.Wrap(err, fmt.Sprintf("got unexpected ResourceName value from NatGateway PublicIpAddress, ResourceName=%s", resource.ResourceName)) } - return &infrav1.NatGateway{ - ID: to.String(existingNatGateway.ID), - Name: to.String(existingNatGateway.Name), - NatGatewayIP: infrav1.PublicIPSpec{ - Name: resource.ResourceName, - }, - }, nil + s.Scope.UpdatePutStatus(infrav1.NATGatewaysReadyCondition, serviceName, result) + return result } // Delete deletes the nat gateway with the provided name. func (s *Service) Delete(ctx context.Context) error { - ctx, _, done := tele.StartSpanWithLogger(ctx, "natgateways.Service.Delete") - defer done() + ctx, span := tele.Tracer().Start(ctx, "vnetpeerings.Service.Delete") + defer span.End() - if !s.Scope.Vnet().IsManaged(s.Scope.ClusterName()) { - s.Scope.V(4).Info("Skipping nat gateway deletion in custom vnet mode") - return nil - } + ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultAzureServiceReconcileTimeout) + defer cancel() + + var result error + + // We go through the list of NatGatewaySpecs to delete each one, independently of the result of the previous one. + // If multiple erros occur, we return the most pressing one + // order of precedence is: error deleting -> deleting in progress -> deleted (no error) for _, natGatewaySpec := range s.Scope.NatGatewaySpecs() { - s.Scope.V(2).Info("deleting nat gateway", "nat gateway", natGatewaySpec.Name) - err := s.client.Delete(ctx, s.Scope.ResourceGroup(), natGatewaySpec.Name) - if err != nil && azure.ResourceNotFound(err) { - // already deleted - continue - } - if err != nil { - return errors.Wrapf(err, "failed to delete nat gateway %s in resource group %s", natGatewaySpec.Name, s.Scope.ResourceGroup()) + if err := async.DeleteResource(ctx, s.Scope, s.client, natGatewaySpec, serviceName); err != nil { + if !azure.IsOperationNotDoneError(err) || result == nil { + result = err + } } - - s.Scope.V(2).Info("successfully deleted nat gateway", "nat gateway", natGatewaySpec.Name) } - return nil + s.Scope.UpdateDeleteStatus(infrav1.NATGatewaysReadyCondition, serviceName, result) + return result } diff --git a/azure/services/natgateways/spec.go b/azure/services/natgateways/spec.go new file mode 100644 index 00000000000..121543b089b --- /dev/null +++ b/azure/services/natgateways/spec.go @@ -0,0 +1,115 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package natgateways + +import ( + "fmt" + + "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2019-06-01/network" + autorest "github.com/Azure/go-autorest/autorest/azure" + "github.com/Azure/go-autorest/autorest/to" + "github.com/pkg/errors" + infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-azure/azure" +) + +// NatGatewaySpec defines the specification for a Nat Gateway. +type NatGatewaySpec struct { + Name string + ResourceGroup string + SubscriptionID string + Location string + NatGatewayIP infrav1.PublicIPSpec + Subnet infrav1.SubnetSpec +} + +// ResourceName returns the name of the Nat Gateway. +func (s *NatGatewaySpec) ResourceName() string { + return s.Name +} + +// ResourceGroupName returns the name of the resource group. +func (s *NatGatewaySpec) ResourceGroupName() string { + return s.ResourceGroup +} + +// OwnerResourceName is a no-op for Nat Gateways. +func (s *NatGatewaySpec) OwnerResourceName() string { + return s.Subnet.Name +} + +// Parameters returns the parameters for the Nat Gateway. +func (s *NatGatewaySpec) Parameters(existing interface{}) (interface{}, error) { + if existing != nil { + if _, ok := existing.(network.NatGateway); !ok { + return nil, errors.Errorf("%T is not a network.NatGateway", existing) + } + // Nat Gateway already exists, parse existing spec + existingNatGateway, err := ParseExistingNatGateway(existing.(network.NatGateway)) + if err != nil { + return nil, err + } + s.Subnet.NatGateway.ID = existingNatGateway.ID + if existingNatGateway.NatGatewayIP.Name == s.NatGatewayIP.Name { + // Skip update for Nat Gateway as it exists with expected values + s.Subnet.NatGateway = *existingNatGateway + // s.Scope.SetSubnet(s.Subnet) + return nil, nil + } + // s.Scope.V(2).Info("updating NAT gateway IP name to match the spec", "old name", existingNatGateway.NatGatewayIP.Name, "desired name", s.NatGatewayIP.Name) + + } + + natGatewayToCreate := network.NatGateway{ + Location: to.StringPtr(s.Location), + Sku: &network.NatGatewaySku{Name: network.Standard}, + NatGatewayPropertiesFormat: &network.NatGatewayPropertiesFormat{ + PublicIPAddresses: &[]network.SubResource{ + { + ID: to.StringPtr(azure.PublicIPID(s.SubscriptionID, s.ResourceGroupName(), s.NatGatewayIP.Name)), + }, + }, + }, + } + + return natGatewayToCreate, nil +} + +func ParseExistingNatGateway(existingNatGateway network.NatGateway) (*infrav1.NatGateway, error) { + // We must have a non-nil, non-"empty" PublicIPAddresses + if !(existingNatGateway.PublicIPAddresses != nil && len(*existingNatGateway.PublicIPAddresses) > 0) { + return nil, errors.New("failed to parse PublicIPAddresses") + } + // TODO: do we need to handle NatGateway resources w/ more than one public IP address? + publicIPAddressID := to.String((*existingNatGateway.PublicIPAddresses)[0].ID) + resource, err := autorest.ParseResourceID(publicIPAddressID) + if err != nil { + return nil, errors.Wrap(err, "failed to parse Resource ID from PublicIPAddresses ID") + } + // We depend upon a non-empty ResourceName string + if resource.ResourceName == "" { + return nil, errors.Wrap(err, fmt.Sprintf("got unexpected ResourceName value from NatGateway PublicIpAddress, ResourceName=%s", resource.ResourceName)) + } + + return &infrav1.NatGateway{ + ID: to.String(existingNatGateway.ID), + Name: to.String(existingNatGateway.Name), + NatGatewayIP: infrav1.PublicIPSpec{ + Name: resource.ResourceName, + }, + }, nil +} diff --git a/azure/types.go b/azure/types.go index 0cf5e361969..e3f65e18c8f 100644 --- a/azure/types.go +++ b/azure/types.go @@ -78,13 +78,6 @@ type RouteTableSpec struct { Subnet infrav1.SubnetSpec } -// NatGatewaySpec defines the specification for a Nat Gateway. -type NatGatewaySpec struct { - NatGatewayIP infrav1.PublicIPSpec - Name string - Subnet infrav1.SubnetSpec -} - // InboundNatSpec defines the specification for an inbound NAT rule. type InboundNatSpec struct { Name string