From 175ba265e6f66fe4f71bf400cedc928c0fc5304c Mon Sep 17 00:00:00 2001 From: Richard Chen Date: Tue, 14 Feb 2023 10:23:45 -0800 Subject: [PATCH] Implement the reconciliation loop for GCPManagedMachinePool --- cloud/scope/managedcontrolplane.go | 45 ++- cloud/scope/managedmachinepool.go | 233 +++++++++++ .../services/container/clusters/kubeconfig.go | 2 +- .../services/container/clusters/reconcile.go | 47 ++- cloud/services/container/nodepools/doc.go | 18 + .../services/container/nodepools/reconcile.go | 375 ++++++++++++++++++ cloud/services/container/nodepools/service.go | 36 ++ ...uster.x-k8s.io_gcpmanagedmachinepools.yaml | 19 +- config/rbac/role.yaml | 9 + exp/api/v1beta1/conditions_consts.go | 28 ++ exp/api/v1beta1/gcpmanagedcluster_types.go | 1 + .../v1beta1/gcpmanagedmachinepool_types.go | 25 +- exp/api/v1beta1/types.go | 31 ++ exp/api/v1beta1/webhook_suite_test.go | 1 - exp/api/v1beta1/zz_generated.deepcopy.go | 33 +- .../gcpmanagedcontrolplane_controller.go | 2 + .../gcpmanagedmachinepool_controller.go | 358 ++++++++++++++++- go.mod | 2 +- main.go | 78 ++-- util/location/location.go | 50 +++ util/resourceurl/parse.go | 75 ++++ 21 files changed, 1385 insertions(+), 83 deletions(-) create mode 100644 cloud/scope/managedmachinepool.go create mode 100644 cloud/services/container/nodepools/doc.go create mode 100644 cloud/services/container/nodepools/reconcile.go create mode 100644 cloud/services/container/nodepools/service.go create mode 100644 util/location/location.go create mode 100644 util/resourceurl/parse.go diff --git a/cloud/scope/managedcontrolplane.go b/cloud/scope/managedcontrolplane.go index b71dd89ae..319163129 100644 --- a/cloud/scope/managedcontrolplane.go +++ b/cloud/scope/managedcontrolplane.go @@ -19,7 +19,8 @@ package scope import ( "context" "fmt" - "strings" + + "sigs.k8s.io/cluster-api-provider-gcp/util/location" "google.golang.org/api/option" @@ -30,6 +31,7 @@ import ( "github.com/pkg/errors" infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + clusterv1exp "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -124,6 +126,9 @@ type ManagedControlPlaneScope struct { mcClient *container.ClusterManagerClient credentialsClient *credentials.IamCredentialsClient credential *Credential + + AllMachinePools []clusterv1exp.MachinePool + AllManagedMachinePools []infrav1exp.GCPManagedMachinePool } // PatchObject persists the managed control plane configuration and status. @@ -171,19 +176,36 @@ func (s *ManagedControlPlaneScope) GetCredential() *Credential { return s.credential } -func parseLocation(location string) (region string, zone *string) { - parts := strings.Split(location, "-") - region = strings.Join(parts[:2], "-") - if len(parts) == 3 { - return region, &parts[2] +// GetAllNodePools gets all node pools for the control plane. +func (s *ManagedControlPlaneScope) GetAllNodePools(ctx context.Context) ([]infrav1exp.GCPManagedMachinePool, []clusterv1exp.MachinePool, error) { + if s.AllManagedMachinePools == nil || len(s.AllManagedMachinePools) == 0 { + listOptions := []client.ListOption{ + client.InNamespace(s.GCPManagedControlPlane.Namespace), + client.MatchingLabels(map[string]string{clusterv1.ClusterLabelName: s.Cluster.Name}), + } + + machinePoolList := &clusterv1exp.MachinePoolList{} + if err := s.client.List(ctx, machinePoolList, listOptions...); err != nil { + return nil, nil, err + } + managedMachinePoolList := &infrav1exp.GCPManagedMachinePoolList{} + if err := s.client.List(ctx, managedMachinePoolList, listOptions...); err != nil { + return nil, nil, err + } + if len(machinePoolList.Items) != len(managedMachinePoolList.Items) { + return nil, nil, fmt.Errorf("machinePoolList length (%d) != managedMachinePoolList length (%d)", len(machinePoolList.Items), len(managedMachinePoolList.Items)) + } + s.AllMachinePools = machinePoolList.Items + s.AllManagedMachinePools = managedMachinePoolList.Items } - return region, nil + + return s.AllManagedMachinePools, s.AllMachinePools, nil } // Region returns the region of the GKE cluster. func (s *ManagedControlPlaneScope) Region() string { - region, _ := parseLocation(s.GCPManagedControlPlane.Spec.Location) - return region + loc, _ := location.Parse(s.GCPManagedControlPlane.Spec.Location) + return loc.Region } // ClusterLocation returns the location of the cluster. @@ -196,6 +218,11 @@ func (s *ManagedControlPlaneScope) ClusterFullName() string { return fmt.Sprintf("%s/clusters/%s", s.ClusterLocation(), s.GCPManagedControlPlane.Spec.ClusterName) } +// ClusterName returns the name of the cluster. +func (s *ManagedControlPlaneScope) ClusterName() string { + return s.GCPManagedControlPlane.Spec.ClusterName +} + // SetEndpoint sets the Endpoint of GCPManagedControlPlane. func (s *ManagedControlPlaneScope) SetEndpoint(host string) { s.GCPManagedControlPlane.Spec.Endpoint = clusterv1.APIEndpoint{ diff --git a/cloud/scope/managedmachinepool.go b/cloud/scope/managedmachinepool.go new file mode 100644 index 000000000..2a9136658 --- /dev/null +++ b/cloud/scope/managedmachinepool.go @@ -0,0 +1,233 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scope + +import ( + "context" + "fmt" + + "sigs.k8s.io/cluster-api-provider-gcp/util/location" + + "google.golang.org/api/option" + "sigs.k8s.io/cluster-api/util/conditions" + + compute "cloud.google.com/go/compute/apiv1" + container "cloud.google.com/go/container/apiv1" + "cloud.google.com/go/container/apiv1/containerpb" + "github.com/pkg/errors" + infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + clusterv1exp "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// ManagedMachinePoolScopeParams defines the input parameters used to create a new Scope. +type ManagedMachinePoolScopeParams struct { + ManagedClusterClient *container.ClusterManagerClient + InstanceGroupManagersClient *compute.InstanceGroupManagersClient + Client client.Client + Cluster *clusterv1.Cluster + MachinePool *clusterv1exp.MachinePool + GCPManagedCluster *infrav1exp.GCPManagedCluster + GCPManagedControlPlane *infrav1exp.GCPManagedControlPlane + GCPManagedMachinePool *infrav1exp.GCPManagedMachinePool +} + +// NewManagedMachinePoolScope creates a new Scope from the supplied parameters. +// This is meant to be called for each reconcile iteration. +func NewManagedMachinePoolScope(ctx context.Context, params ManagedMachinePoolScopeParams) (*ManagedMachinePoolScope, error) { + if params.Cluster == nil { + return nil, errors.New("failed to generate new scope from nil Cluster") + } + if params.MachinePool == nil { + return nil, errors.New("failed to generate new scope from nil MachinePool") + } + if params.GCPManagedCluster == nil { + return nil, errors.New("failed to generate new scope from nil GCPManagedCluster") + } + if params.GCPManagedControlPlane == nil { + return nil, errors.New("failed to generate new scope from nil GCPManagedControlPlane") + } + if params.GCPManagedMachinePool == nil { + return nil, errors.New("failed to generate new scope from nil GCPManagedMachinePool") + } + + var credentialData []byte + var err error + if params.GCPManagedCluster.Spec.CredentialsRef != nil { + credentialData, err = getCredentialDataFromRef(ctx, params.GCPManagedCluster.Spec.CredentialsRef, params.Client) + } else { + credentialData, err = getCredentialDataFromMount() + } + if err != nil { + return nil, errors.Errorf("failed to get credential data: %v", err) + } + + if params.ManagedClusterClient == nil { + var managedClusterClient *container.ClusterManagerClient + managedClusterClient, err = container.NewClusterManagerClient(ctx, option.WithCredentialsJSON(credentialData)) + if err != nil { + return nil, errors.Errorf("failed to create gcp managed cluster client: %v", err) + } + params.ManagedClusterClient = managedClusterClient + } + if params.InstanceGroupManagersClient == nil { + var instanceGroupManagersClient *compute.InstanceGroupManagersClient + instanceGroupManagersClient, err = compute.NewInstanceGroupManagersRESTClient(ctx, option.WithCredentialsJSON(credentialData)) + if err != nil { + return nil, errors.Errorf("failed to create gcp instance group manager client: %v", err) + } + params.InstanceGroupManagersClient = instanceGroupManagersClient + } + + helper, err := patch.NewHelper(params.GCPManagedMachinePool, params.Client) + if err != nil { + return nil, errors.Wrap(err, "failed to init patch helper") + } + + return &ManagedMachinePoolScope{ + client: params.Client, + Cluster: params.Cluster, + MachinePool: params.MachinePool, + GCPManagedControlPlane: params.GCPManagedControlPlane, + GCPManagedMachinePool: params.GCPManagedMachinePool, + mcClient: params.ManagedClusterClient, + migClient: params.InstanceGroupManagersClient, + patchHelper: helper, + }, nil +} + +// ManagedMachinePoolScope defines the basic context for an actuator to operate upon. +type ManagedMachinePoolScope struct { + client client.Client + patchHelper *patch.Helper + + Cluster *clusterv1.Cluster + MachinePool *clusterv1exp.MachinePool + GCPManagedCluster *infrav1exp.GCPManagedCluster + GCPManagedControlPlane *infrav1exp.GCPManagedControlPlane + GCPManagedMachinePool *infrav1exp.GCPManagedMachinePool + mcClient *container.ClusterManagerClient + migClient *compute.InstanceGroupManagersClient +} + +// PatchObject persists the managed control plane configuration and status. +func (s *ManagedMachinePoolScope) PatchObject() error { + return s.patchHelper.Patch( + context.TODO(), + s.GCPManagedMachinePool, + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + infrav1exp.GKEMachinePoolReadyCondition, + infrav1exp.GKEMachinePoolCreatingCondition, + infrav1exp.GKEMachinePoolUpdatingCondition, + infrav1exp.GKEMachinePoolDeletingCondition, + }}) +} + +// Close closes the current scope persisting the managed control plane configuration and status. +func (s *ManagedMachinePoolScope) Close() error { + s.mcClient.Close() + s.migClient.Close() + return s.PatchObject() +} + +// ConditionSetter return a condition setter (which is GCPManagedMachinePool itself). +func (s *ManagedMachinePoolScope) ConditionSetter() conditions.Setter { + return s.GCPManagedMachinePool +} + +// ManagedMachinePoolClient returns a client used to interact with GKE. +func (s *ManagedMachinePoolScope) ManagedMachinePoolClient() *container.ClusterManagerClient { + return s.mcClient +} + +// InstanceGroupManagersClient returns a client used to interact with GCP MIG. +func (s *ManagedMachinePoolScope) InstanceGroupManagersClient() *compute.InstanceGroupManagersClient { + return s.migClient +} + +// NodePoolVersion returns the k8s version of the node pool. +func (s *ManagedMachinePoolScope) NodePoolVersion() *string { + return s.MachinePool.Spec.Template.Spec.Version +} + +// ConvertToSdkNodePool converts a node pool to format that is used by GCP SDK. +func ConvertToSdkNodePool(nodePool infrav1exp.GCPManagedMachinePool, machinePool clusterv1exp.MachinePool) *containerpb.NodePool { + nodePoolName := nodePool.Spec.NodePoolName + if len(nodePoolName) == 0 { + nodePoolName = nodePool.Name + } + sdkNodePool := containerpb.NodePool{ + Name: nodePoolName, + InitialNodeCount: nodePool.Spec.InitialNodeCount, + Config: &containerpb.NodeConfig{ + Labels: nodePool.Spec.KubernetesLabels, + Taints: infrav1exp.ConvertToSdkTaint(nodePool.Spec.KubernetesTaints), + Metadata: nodePool.Spec.AdditionalLabels, + }, + } + if nodePool.Spec.Scaling != nil { + sdkNodePool.Autoscaling = &containerpb.NodePoolAutoscaling{ + Enabled: true, + MinNodeCount: *nodePool.Spec.Scaling.MinCount, + MaxNodeCount: *nodePool.Spec.Scaling.MaxCount, + } + } + if machinePool.Spec.Template.Spec.Version != nil { + sdkNodePool.Version = *machinePool.Spec.Template.Spec.Version + } + return &sdkNodePool +} + +// ConvertToSdkNodePools converts node pools to format that is used by GCP SDK. +func ConvertToSdkNodePools(nodePools []infrav1exp.GCPManagedMachinePool, machinePools []clusterv1exp.MachinePool) []*containerpb.NodePool { + res := []*containerpb.NodePool{} + for i := range nodePools { + res = append(res, ConvertToSdkNodePool(nodePools[i], machinePools[i])) + } + return res +} + +// SetReplicas sets the replicas count in status. +func (s *ManagedMachinePoolScope) SetReplicas(replicas int32) { + s.GCPManagedMachinePool.Status.Replicas = replicas +} + +// NodePoolName returns the node pool name. +func (s *ManagedMachinePoolScope) NodePoolName() string { + if len(s.GCPManagedMachinePool.Spec.NodePoolName) > 0 { + return s.GCPManagedMachinePool.Spec.NodePoolName + } + return s.GCPManagedMachinePool.Name +} + +// Region returns the region of the GKE node pool. +func (s *ManagedMachinePoolScope) Region() string { + loc, _ := location.Parse(s.GCPManagedControlPlane.Spec.Location) + return loc.Region +} + +// NodePoolLocation returns the location of the node pool. +func (s *ManagedMachinePoolScope) NodePoolLocation() string { + return fmt.Sprintf("projects/%s/locations/%s/clusters/%s", s.GCPManagedControlPlane.Spec.Project, s.Region(), s.GCPManagedControlPlane.Spec.ClusterName) +} + +// NodePoolFullName returns the full name of the node pool. +func (s *ManagedMachinePoolScope) NodePoolFullName() string { + return fmt.Sprintf("%s/nodePools/%s", s.NodePoolLocation(), s.NodePoolName()) +} diff --git a/cloud/services/container/clusters/kubeconfig.go b/cloud/services/container/clusters/kubeconfig.go index 2b3e706c7..e04d793ca 100644 --- a/cloud/services/container/clusters/kubeconfig.go +++ b/cloud/services/container/clusters/kubeconfig.go @@ -198,7 +198,7 @@ func (s *Service) updateCAPIKubeconfigSecret(ctx context.Context, configSecret * } func (s *Service) getKubeConfigContextName(isUser bool) string { - contextName := fmt.Sprintf("gke_%s_%s_%s", s.scope.GCPManagedControlPlane.Spec.Project, s.scope.GCPManagedControlPlane.Spec.Location, s.scope.GCPManagedControlPlane.Name) + contextName := fmt.Sprintf("gke_%s_%s_%s", s.scope.GCPManagedControlPlane.Spec.Project, s.scope.GCPManagedControlPlane.Spec.Location, s.scope.ClusterName()) if isUser { contextName = fmt.Sprintf("%s-user", contextName) } diff --git a/cloud/services/container/clusters/reconcile.go b/cloud/services/container/clusters/reconcile.go index 240a4d354..bffc7de17 100644 --- a/cloud/services/container/clusters/reconcile.go +++ b/cloud/services/container/clusters/reconcile.go @@ -20,6 +20,8 @@ import ( "context" "fmt" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/scope" + "cloud.google.com/go/container/apiv1/containerpb" "github.com/googleapis/gax-go/v2/apierror" "github.com/pkg/errors" @@ -40,17 +42,36 @@ func (s *Service) Reconcile(ctx context.Context) (ctrl.Result, error) { cluster, err := s.describeCluster(ctx) if err != nil { s.scope.GCPManagedControlPlane.Status.Ready = false + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEControlPlaneReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) return ctrl.Result{}, err } if cluster == nil { log.Info("Cluster not found, creating") s.scope.GCPManagedControlPlane.Status.Ready = false + + nodePools, _, err := s.scope.GetAllNodePools(ctx) + if err != nil { + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEControlPlaneReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneReadyCondition, infrav1exp.GKEControlPlaneReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneCreatingCondition, infrav1exp.GKEControlPlaneReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return ctrl.Result{}, err + } + if len(nodePools) == 0 { + log.Info("At least 1 node pool is required to create GKE cluster") + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEControlPlaneRequiresAtLeastOneNodePoolReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneReadyCondition, infrav1exp.GKEControlPlaneRequiresAtLeastOneNodePoolReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneCreatingCondition, infrav1exp.GKEControlPlaneRequiresAtLeastOneNodePoolReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil + } + if err = s.createCluster(ctx); err != nil { + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEControlPlaneReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneReadyCondition, infrav1exp.GKEControlPlaneReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneCreatingCondition, infrav1exp.GKEControlPlaneReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) return ctrl.Result{}, err } log.Info("Cluster provisioning in progress") + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEControlPlaneCreatingReason, clusterv1.ConditionSeverityInfo, "") conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneReadyCondition, infrav1exp.GKEControlPlaneCreatingReason, clusterv1.ConditionSeverityInfo, "") conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneCreatingCondition) return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil @@ -59,6 +80,7 @@ func (s *Service) Reconcile(ctx context.Context) (ctrl.Result, error) { switch cluster.Status { case containerpb.Cluster_PROVISIONING: log.Info("Cluster provisioning in progress") + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEControlPlaneCreatingReason, clusterv1.ConditionSeverityInfo, "") conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneReadyCondition, infrav1exp.GKEControlPlaneCreatingReason, clusterv1.ConditionSeverityInfo, "") conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneCreatingCondition) s.scope.GCPManagedControlPlane.Status.Ready = false @@ -70,6 +92,7 @@ func (s *Service) Reconcile(ctx context.Context) (ctrl.Result, error) { return ctrl.Result{}, nil case containerpb.Cluster_STOPPING: log.Info("Cluster stopping in progress") + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEControlPlaneDeletingReason, clusterv1.ConditionSeverityInfo, "") conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneReadyCondition, infrav1exp.GKEControlPlaneDeletingReason, clusterv1.ConditionSeverityInfo, "") conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneDeletingCondition) s.scope.GCPManagedControlPlane.Status.Ready = false @@ -79,14 +102,14 @@ func (s *Service) Reconcile(ctx context.Context) (ctrl.Result, error) { if len(cluster.Conditions) > 0 { msg = cluster.Conditions[0].GetMessage() } - log.Error(errors.New("Cluster in error/degraded state"), msg, "name", s.scope.GCPManagedControlPlane.Name) + log.Error(errors.New("Cluster in error/degraded state"), msg, "name", s.scope.ClusterName()) conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneReadyCondition, infrav1exp.GKEControlPlaneErrorReason, clusterv1.ConditionSeverityError, "") s.scope.GCPManagedControlPlane.Status.Ready = false return ctrl.Result{}, nil case containerpb.Cluster_RUNNING: log.Info("Cluster running") default: - log.Error(errors.New("Unhandled cluster status"), fmt.Sprintf("Unhandled cluster status %s", cluster.Status), "name", s.scope.GCPManagedControlPlane.Name) + log.Error(errors.New("Unhandled cluster status"), fmt.Sprintf("Unhandled cluster status %s", cluster.Status), "name", s.scope.ClusterName()) return ctrl.Result{}, nil } @@ -116,6 +139,7 @@ func (s *Service) Reconcile(ctx context.Context) (ctrl.Result, error) { s.scope.SetEndpoint(cluster.Endpoint) log.Info("Cluster reconciled") + conditions.MarkTrue(s.scope.ConditionSetter(), clusterv1.ReadyCondition) conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneReadyCondition) conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneCreatingCondition, infrav1exp.GKEControlPlaneCreatedReason, clusterv1.ConditionSeverityInfo, "") s.scope.GCPManagedControlPlane.Status.Ready = true @@ -160,6 +184,7 @@ func (s *Service) Delete(ctx context.Context) (ctrl.Result, error) { } log.Info("Cluster deleting in progress") s.scope.GCPManagedControlPlane.Status.Ready = false + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEControlPlaneDeletingReason, clusterv1.ConditionSeverityInfo, "") conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneReadyCondition, infrav1exp.GKEControlPlaneDeletingReason, clusterv1.ConditionSeverityInfo, "") conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEControlPlaneDeletingCondition) @@ -180,7 +205,7 @@ func (s *Service) describeCluster(ctx context.Context) (*containerpb.Cluster, er return nil, nil } } - log.Error(err, "Error getting GKE cluster", "name", s.scope.GCPManagedControlPlane.Name) + log.Error(err, "Error getting GKE cluster", "name", s.scope.ClusterName()) return nil, err } @@ -190,18 +215,14 @@ func (s *Service) describeCluster(ctx context.Context) (*containerpb.Cluster, er func (s *Service) createCluster(ctx context.Context) error { log := log.FromContext(ctx) + nodePools, machinePools, _ := s.scope.GetAllNodePools(ctx) cluster := &containerpb.Cluster{ - Name: s.scope.GCPManagedControlPlane.Name, + Name: s.scope.ClusterName(), Network: *s.scope.GCPManagedCluster.Spec.Network.Name, Autopilot: &containerpb.Autopilot{ Enabled: false, }, - NodePools: []*containerpb.NodePool{ - { - Name: "default", - InitialNodeCount: 1, - }, - }, + NodePools: scope.ConvertToSdkNodePools(nodePools, machinePools), ReleaseChannel: &containerpb.ReleaseChannel{ Channel: convertToSdkReleaseChannel(s.scope.GCPManagedControlPlane.Spec.ReleaseChannel), }, @@ -215,7 +236,7 @@ func (s *Service) createCluster(ctx context.Context) error { } _, err := s.scope.ManagedControlPlaneClient().CreateCluster(ctx, createClusterRequest) if err != nil { - log.Error(err, "Error creating GKE cluster", "name", s.scope.GCPManagedControlPlane.Name) + log.Error(err, "Error creating GKE cluster", "name", s.scope.ClusterName()) return err } @@ -227,7 +248,7 @@ func (s *Service) updateCluster(ctx context.Context, updateClusterRequest *conta _, err := s.scope.ManagedControlPlaneClient().UpdateCluster(ctx, updateClusterRequest) if err != nil { - log.Error(err, "Error updating GKE cluster", "name", s.scope.GCPManagedControlPlane.Name) + log.Error(err, "Error updating GKE cluster", "name", s.scope.ClusterName()) return err } @@ -242,7 +263,7 @@ func (s *Service) deleteCluster(ctx context.Context) error { } _, err := s.scope.ManagedControlPlaneClient().DeleteCluster(ctx, deleteClusterRequest) if err != nil { - log.Error(err, "Error deleting GKE cluster", "name", s.scope.GCPManagedControlPlane.Name) + log.Error(err, "Error deleting GKE cluster", "name", s.scope.ClusterName()) return err } diff --git a/cloud/services/container/nodepools/doc.go b/cloud/services/container/nodepools/doc.go new file mode 100644 index 000000000..aa21dbe42 --- /dev/null +++ b/cloud/services/container/nodepools/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package nodepools implements reconciler for GKE node pool components. +package nodepools diff --git a/cloud/services/container/nodepools/reconcile.go b/cloud/services/container/nodepools/reconcile.go new file mode 100644 index 000000000..5b94b22cb --- /dev/null +++ b/cloud/services/container/nodepools/reconcile.go @@ -0,0 +1,375 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodepools + +import ( + "context" + "fmt" + "reflect" + + "sigs.k8s.io/cluster-api-provider-gcp/util/resourceurl" + + "google.golang.org/api/iterator" + "google.golang.org/grpc/codes" + + "cloud.google.com/go/compute/apiv1/computepb" + "cloud.google.com/go/container/apiv1/containerpb" + "github.com/googleapis/gax-go/v2/apierror" + "github.com/pkg/errors" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/scope" + infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-gcp/util/reconciler" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// Reconcile reconcile GKE node pool. +func (s *Service) Reconcile(ctx context.Context) (ctrl.Result, error) { + log := log.FromContext(ctx) + log.Info("Reconciling node pool resources") + + nodePool, err := s.describeNodePool(ctx) + if err != nil { + s.scope.GCPManagedMachinePool.Status.Ready = false + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEMachinePoolReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return ctrl.Result{}, err + } + if nodePool == nil { + log.Info("Node pool not found, creating", "cluster", s.scope.Cluster.Name) + s.scope.GCPManagedMachinePool.Status.Ready = false + if err = s.createNodePool(ctx); err != nil { + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEMachinePoolReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolReadyCondition, infrav1exp.GKEMachinePoolReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolCreatingCondition, infrav1exp.GKEMachinePoolReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return ctrl.Result{}, err + } + log.Info("Node pool provisioning in progress") + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEMachinePoolCreatingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolReadyCondition, infrav1exp.GKEMachinePoolCreatingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolCreatingCondition) + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil + } + + instances, err := s.getInstances(ctx, nodePool) + if err != nil { + s.scope.GCPManagedMachinePool.Status.Ready = false + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEMachinePoolReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return ctrl.Result{}, err + } + providerIDList := []string{} + for _, instance := range instances { + providerIDList = append(providerIDList, *instance.Instance) + } + s.scope.GCPManagedMachinePool.Spec.ProviderIDList = providerIDList + + switch nodePool.Status { + case containerpb.NodePool_PROVISIONING: + log.Info("Node pool provisioning in progress") + s.scope.GCPManagedMachinePool.Status.Ready = false + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEMachinePoolCreatingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolReadyCondition, infrav1exp.GKEMachinePoolCreatingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolCreatingCondition) + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil + case containerpb.NodePool_RECONCILING: + log.Info("Node pool reconciling in progress") + s.scope.GCPManagedMachinePool.Status.Ready = true + conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolUpdatingCondition) + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil + case containerpb.NodePool_STOPPING: + log.Info("Node pool stopping in progress") + s.scope.GCPManagedMachinePool.Status.Ready = false + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEMachinePoolDeletingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolReadyCondition, infrav1exp.GKEMachinePoolDeletingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolDeletingCondition) + return ctrl.Result{}, nil + case containerpb.NodePool_ERROR, containerpb.NodePool_RUNNING_WITH_ERROR: + var msg string + if len(nodePool.Conditions) > 0 { + msg = nodePool.Conditions[0].GetMessage() + } + log.Error(errors.New("Node pool in error/degraded state"), msg, "name", s.scope.GCPManagedMachinePool.Name) + s.scope.GCPManagedMachinePool.Status.Ready = false + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolReadyCondition, infrav1exp.GKEMachinePoolErrorReason, clusterv1.ConditionSeverityError, "") + return ctrl.Result{}, nil + case containerpb.NodePool_RUNNING: + log.Info("Node pool running") + default: + log.Error(errors.New("Unhandled node pool status"), fmt.Sprintf("Unhandled node pool status %s", nodePool.Status), "name", s.scope.GCPManagedMachinePool.Name) + return ctrl.Result{}, nil + } + + needUpdateVersionOrImage, nodePoolUpdateVersionOrImage := s.checkDiffAndPrepareUpdateVersionOrImage(nodePool) + if needUpdateVersionOrImage { + log.Info("Version/image update required") + err = s.updateNodePoolVersionOrImage(ctx, nodePoolUpdateVersionOrImage) + if err != nil { + return ctrl.Result{}, err + } + log.Info("Node pool version/image updating in progress") + s.scope.GCPManagedMachinePool.Status.Ready = true + conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolUpdatingCondition) + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil + } + + needUpdateAutoscaling, setNodePoolAutoscalingRequest := s.checkDiffAndPrepareUpdateAutoscaling(nodePool) + if needUpdateAutoscaling { + log.Info("Auto scaling update required") + err = s.updateNodePoolAutoscaling(ctx, setNodePoolAutoscalingRequest) + if err != nil { + return ctrl.Result{}, err + } + log.Info("Node pool auto scaling updating in progress") + s.scope.GCPManagedMachinePool.Status.Ready = true + conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolUpdatingCondition) + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil + } + + needUpdateSize, setNodePoolSizeRequest := s.checkDiffAndPrepareUpdateSize(nodePool) + if needUpdateSize { + log.Info("Size update required") + err = s.updateNodePoolSize(ctx, setNodePoolSizeRequest) + if err != nil { + return ctrl.Result{}, err + } + log.Info("Node pool size updating in progress") + s.scope.GCPManagedMachinePool.Status.Ready = true + conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolUpdatingCondition) + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil + } + + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolUpdatingCondition, infrav1exp.GKEMachinePoolUpdatedReason, clusterv1.ConditionSeverityInfo, "") + + s.scope.SetReplicas(int32(len(s.scope.GCPManagedMachinePool.Spec.ProviderIDList))) + log.Info("Node pool reconciled") + s.scope.GCPManagedMachinePool.Status.Ready = true + conditions.MarkTrue(s.scope.ConditionSetter(), clusterv1.ReadyCondition) + conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolReadyCondition) + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolCreatingCondition, infrav1exp.GKEMachinePoolCreatedReason, clusterv1.ConditionSeverityInfo, "") + + return ctrl.Result{}, nil +} + +// Delete delete GKE node pool. +func (s *Service) Delete(ctx context.Context) (ctrl.Result, error) { + log := log.FromContext(ctx) + log.Info("Deleting node pool resources") + + nodePool, err := s.describeNodePool(ctx) + if err != nil { + return ctrl.Result{}, err + } + if nodePool == nil { + log.Info("Node pool already deleted") + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolDeletingCondition, infrav1exp.GKEMachinePoolDeletedReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{}, err + } + + switch nodePool.Status { + case containerpb.NodePool_PROVISIONING: + log.Info("Node pool provisioning in progress") + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil + case containerpb.NodePool_RECONCILING: + log.Info("Node pool reconciling in progress") + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil + case containerpb.NodePool_STOPPING: + log.Info("Node pool stopping in progress") + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolReadyCondition, infrav1exp.GKEMachinePoolDeletingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolDeletingCondition) + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil + default: + break + } + + if err = s.deleteNodePool(ctx); err != nil { + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolDeletingCondition, infrav1exp.GKEMachinePoolReconciliationFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return ctrl.Result{}, err + } + log.Info("Node pool deleting in progress") + s.scope.GCPManagedMachinePool.Status.Ready = false + conditions.MarkFalse(s.scope.ConditionSetter(), clusterv1.ReadyCondition, infrav1exp.GKEMachinePoolDeletingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolReadyCondition, infrav1exp.GKEMachinePoolDeletingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkTrue(s.scope.ConditionSetter(), infrav1exp.GKEMachinePoolDeletingCondition) + + return ctrl.Result{}, nil +} + +func (s *Service) describeNodePool(ctx context.Context) (*containerpb.NodePool, error) { + log := log.FromContext(ctx) + + getNodePoolRequest := &containerpb.GetNodePoolRequest{ + Name: s.scope.NodePoolFullName(), + } + nodePool, err := s.scope.ManagedMachinePoolClient().GetNodePool(ctx, getNodePoolRequest) + if err != nil { + var e *apierror.APIError + if ok := errors.As(err, &e); ok { + if e.GRPCStatus().Code() == codes.NotFound { + return nil, nil + } + } + log.Error(err, "Error getting GKE node pool", "name", s.scope.GCPManagedMachinePool.Name) + return nil, err + } + + return nodePool, nil +} + +func (s *Service) getInstances(ctx context.Context, nodePool *containerpb.NodePool) ([]*computepb.ManagedInstance, error) { + instances := []*computepb.ManagedInstance{} + + for _, url := range nodePool.InstanceGroupUrls { + resourceURL, err := resourceurl.Parse(url) + if err != nil { + return nil, errors.Wrap(err, "error parsing instance group url") + } + listManagedInstancesRequest := &computepb.ListManagedInstancesInstanceGroupManagersRequest{ + InstanceGroupManager: resourceURL.Name, + Project: resourceURL.Project, + Zone: resourceURL.Location, + } + iter := s.scope.InstanceGroupManagersClient().ListManagedInstances(ctx, listManagedInstancesRequest) + for { + resp, err := iter.Next() + if err == iterator.Done { + break + } + if err != nil { + return nil, err + } + instances = append(instances, resp) + } + } + + return instances, nil +} + +func (s *Service) createNodePool(ctx context.Context) error { + createNodePoolRequest := &containerpb.CreateNodePoolRequest{ + NodePool: scope.ConvertToSdkNodePool(*s.scope.GCPManagedMachinePool, *s.scope.MachinePool), + Parent: s.scope.NodePoolLocation(), + } + _, err := s.scope.ManagedMachinePoolClient().CreateNodePool(ctx, createNodePoolRequest) + if err != nil { + return err + } + + return nil +} + +func (s *Service) updateNodePoolVersionOrImage(ctx context.Context, updateNodePoolRequest *containerpb.UpdateNodePoolRequest) error { + _, err := s.scope.ManagedMachinePoolClient().UpdateNodePool(ctx, updateNodePoolRequest) + if err != nil { + return err + } + + return nil +} + +func (s *Service) updateNodePoolAutoscaling(ctx context.Context, setNodePoolAutoscalingRequest *containerpb.SetNodePoolAutoscalingRequest) error { + _, err := s.scope.ManagedMachinePoolClient().SetNodePoolAutoscaling(ctx, setNodePoolAutoscalingRequest) + if err != nil { + return err + } + + return nil +} + +func (s *Service) updateNodePoolSize(ctx context.Context, setNodePoolSizeRequest *containerpb.SetNodePoolSizeRequest) error { + _, err := s.scope.ManagedMachinePoolClient().SetNodePoolSize(ctx, setNodePoolSizeRequest) + if err != nil { + return err + } + + return nil +} + +func (s *Service) deleteNodePool(ctx context.Context) error { + deleteNodePoolRequest := &containerpb.DeleteNodePoolRequest{ + Name: s.scope.NodePoolFullName(), + } + _, err := s.scope.ManagedMachinePoolClient().DeleteNodePool(ctx, deleteNodePoolRequest) + if err != nil { + return err + } + + return nil +} + +func (s *Service) checkDiffAndPrepareUpdateVersionOrImage(existingNodePool *containerpb.NodePool) (bool, *containerpb.UpdateNodePoolRequest) { + needUpdate := false + updateNodePoolRequest := containerpb.UpdateNodePoolRequest{ + Name: s.scope.NodePoolFullName(), + } + // Node version + if s.scope.NodePoolVersion() != nil && *s.scope.NodePoolVersion() != existingNodePool.Version { + needUpdate = true + updateNodePoolRequest.NodeVersion = *s.scope.NodePoolVersion() + } + // Kubernetes labels + if !reflect.DeepEqual(map[string]string(s.scope.GCPManagedMachinePool.Spec.KubernetesLabels), existingNodePool.Config.Labels) { + needUpdate = true + updateNodePoolRequest.Labels = &containerpb.NodeLabels{ + Labels: s.scope.GCPManagedMachinePool.Spec.KubernetesLabels, + } + } + // Kubernetes taints + desiredKubernetesTaints := infrav1exp.ConvertToSdkTaint(s.scope.GCPManagedMachinePool.Spec.KubernetesTaints) + if !reflect.DeepEqual(desiredKubernetesTaints, existingNodePool.Config.Taints) { + needUpdate = true + updateNodePoolRequest.Taints = &containerpb.NodeTaints{ + Taints: desiredKubernetesTaints, + } + } + return needUpdate, &updateNodePoolRequest +} + +func (s *Service) checkDiffAndPrepareUpdateAutoscaling(existingNodePool *containerpb.NodePool) (bool, *containerpb.SetNodePoolAutoscalingRequest) { + needUpdate := false + + desiredAutoscaling := scope.ConvertToSdkNodePool(*s.scope.GCPManagedMachinePool, *s.scope.MachinePool).Autoscaling + var existingAutoscaling *containerpb.NodePoolAutoscaling + if existingNodePool.Autoscaling != nil && existingNodePool.Autoscaling.Enabled { + existingAutoscaling = &containerpb.NodePoolAutoscaling{ + Enabled: true, + MinNodeCount: existingNodePool.Autoscaling.MinNodeCount, + MaxNodeCount: existingNodePool.Autoscaling.MaxNodeCount, + } + } + + setNodePoolAutoscalingRequest := containerpb.SetNodePoolAutoscalingRequest{ + Name: s.scope.NodePoolFullName(), + } + if !reflect.DeepEqual(desiredAutoscaling, existingAutoscaling) { + needUpdate = true + setNodePoolAutoscalingRequest.Autoscaling = desiredAutoscaling + } + return needUpdate, &setNodePoolAutoscalingRequest +} + +func (s *Service) checkDiffAndPrepareUpdateSize(existingNodePool *containerpb.NodePool) (bool, *containerpb.SetNodePoolSizeRequest) { + needUpdate := false + setNodePoolSizeRequest := containerpb.SetNodePoolSizeRequest{ + Name: s.scope.NodePoolFullName(), + } + if s.scope.GCPManagedMachinePool.Spec.InitialNodeCount != existingNodePool.InitialNodeCount { + needUpdate = true + setNodePoolSizeRequest.NodeCount = s.scope.GCPManagedMachinePool.Spec.InitialNodeCount + } + return needUpdate, &setNodePoolSizeRequest +} diff --git a/cloud/services/container/nodepools/service.go b/cloud/services/container/nodepools/service.go new file mode 100644 index 000000000..2f0909480 --- /dev/null +++ b/cloud/services/container/nodepools/service.go @@ -0,0 +1,36 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodepools + +import ( + "sigs.k8s.io/cluster-api-provider-gcp/cloud" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/scope" +) + +// Service implements node pool reconciler. +type Service struct { + scope *scope.ManagedMachinePoolScope +} + +var _ cloud.ReconcilerWithResult = &Service{} + +// New returns Service from given scope. +func New(scope *scope.ManagedMachinePoolScope) *Service { + return &Service{ + scope: scope, + } +} diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmanagedmachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmanagedmachinepools.yaml index 5556a903b..5581257cb 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmanagedmachinepools.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmanagedmachinepools.yaml @@ -88,10 +88,11 @@ spec: - value type: object type: array - nodeVersion: - description: NodeVersion represents the node version of the node pool. - If not specified, the GKE cluster control plane version will be - used. + nodePoolName: + description: NodePoolName specifies the name of the GKE node pool + corresponding to this MachinePool. If you don't specify a name then + a default name will be created based on the namespace and name of + the managed machine pool. type: string providerIDList: description: ProviderIDList are the provider IDs of instances in the @@ -100,6 +101,16 @@ spec: items: type: string type: array + scaling: + description: Scaling specifies scaling for the node pool + properties: + maxCount: + format: int32 + type: integer + minCount: + format: int32 + type: integer + type: object required: - initialNodeCount type: object diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index ab33c4c0f..fd27661a2 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -33,6 +33,15 @@ rules: - get - list - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machinepools + - machinepools/status + verbs: + - get + - list + - watch - apiGroups: - cluster.x-k8s.io resources: diff --git a/exp/api/v1beta1/conditions_consts.go b/exp/api/v1beta1/conditions_consts.go index e8febcfc4..2214f2897 100644 --- a/exp/api/v1beta1/conditions_consts.go +++ b/exp/api/v1beta1/conditions_consts.go @@ -42,4 +42,32 @@ const ( GKEControlPlaneErrorReason = "GKEControlPlaneError" // GKEControlPlaneReconciliationFailedReason used to report failures while reconciling GKE control plane. GKEControlPlaneReconciliationFailedReason = "GKEControlPlaneReconciliationFailed" + // GKEControlPlaneRequiresAtLeastOneNodePoolReason used to report that no node pool is specified for the GKE control plane. + GKEControlPlaneRequiresAtLeastOneNodePoolReason = "GKEControlPlaneRequiresAtLeastOneNodePool" + + // GKEMachinePoolReadyCondition condition reports on the successful reconciliation of GKE node pool. + GKEMachinePoolReadyCondition clusterv1.ConditionType = "GKEMachinePoolReady" + // GKEMachinePoolCreatingCondition condition reports on whether the GKE node pool is creating. + GKEMachinePoolCreatingCondition clusterv1.ConditionType = "GKEMachinePoolCreating" + // GKEMachinePoolUpdatingCondition condition reports on whether the GKE node pool is updating. + GKEMachinePoolUpdatingCondition clusterv1.ConditionType = "GKEMachinePoolUpdating" + // GKEMachinePoolDeletingCondition condition reports on whether the GKE node pool is deleting. + GKEMachinePoolDeletingCondition clusterv1.ConditionType = "GKEMachinePoolDeleting" + + // WaitingForGKEControlPlaneReason used when the machine pool is waiting for GKE control plane infrastructure to be ready before proceeding. + WaitingForGKEControlPlaneReason = "WaitingForGKEControlPlane" + // GKEMachinePoolCreatingReason used to report GKE node pool being created. + GKEMachinePoolCreatingReason = "GKEMachinePoolCreating" + // GKEMachinePoolCreatedReason used to report GKE node pool is created. + GKEMachinePoolCreatedReason = "GKEMachinePoolCreated" + // GKEMachinePoolUpdatedReason used to report GKE node pool is updated. + GKEMachinePoolUpdatedReason = "GKEMachinePoolUpdated" + // GKEMachinePoolDeletingReason used to report GKE node pool being deleted. + GKEMachinePoolDeletingReason = "GKEMachinePoolDeleting" + // GKEMachinePoolDeletedReason used to report GKE node pool is deleted. + GKEMachinePoolDeletedReason = "GKEMachinePoolDeleted" + // GKEMachinePoolErrorReason used to report GKE node pool is in error state. + GKEMachinePoolErrorReason = "GKEMachinePoolError" + // GKEMachinePoolReconciliationFailedReason used to report failures while reconciling GKE node pool. + GKEMachinePoolReconciliationFailedReason = "GKEMachinePoolReconciliationFailed" ) diff --git a/exp/api/v1beta1/gcpmanagedcluster_types.go b/exp/api/v1beta1/gcpmanagedcluster_types.go index a78a14630..cc0921ff0 100644 --- a/exp/api/v1beta1/gcpmanagedcluster_types.go +++ b/exp/api/v1beta1/gcpmanagedcluster_types.go @@ -39,6 +39,7 @@ type GCPManagedClusterSpec struct { // ControlPlaneEndpoint represents the endpoint used to communicate with the control plane. // +optional ControlPlaneEndpoint clusterv1.APIEndpoint `json:"controlPlaneEndpoint"` + // NetworkSpec encapsulates all things related to the GCP network. // +optional Network infrav1.NetworkSpec `json:"network"` diff --git a/exp/api/v1beta1/gcpmanagedmachinepool_types.go b/exp/api/v1beta1/gcpmanagedmachinepool_types.go index 9f2c785f8..81e5037a3 100644 --- a/exp/api/v1beta1/gcpmanagedmachinepool_types.go +++ b/exp/api/v1beta1/gcpmanagedmachinepool_types.go @@ -30,13 +30,16 @@ const ( // GCPManagedMachinePoolSpec defines the desired state of GCPManagedMachinePool. type GCPManagedMachinePoolSpec struct { - // NodeVersion represents the node version of the node pool. - // If not specified, the GKE cluster control plane version will be used. + // NodePoolName specifies the name of the GKE node pool corresponding to this MachinePool. If you don't specify a name + // then a default name will be created based on the namespace and name of the managed machine pool. // +optional - NodeVersion *string `json:"nodeVersion,omitempty"` + NodePoolName string `json:"nodePoolName,omitempty"` // InitialNodeCount represents the initial number of nodes for the pool. // In regional or multi-zonal clusters, this is the number of nodes per zone. InitialNodeCount int32 `json:"initialNodeCount"` + // Scaling specifies scaling for the node pool + // +optional + Scaling *NodePoolAutoScaling `json:"scaling,omitempty"` // KubernetesLabels specifies the labels to apply to the nodes of the node pool. // +optional KubernetesLabels infrav1.Labels `json:"kubernetesLabels,omitempty"` @@ -88,6 +91,22 @@ type GCPManagedMachinePoolList struct { Items []GCPManagedMachinePool `json:"items"` } +// NodePoolAutoScaling specifies scaling options. +type NodePoolAutoScaling struct { + MinCount *int32 `json:"minCount,omitempty"` + MaxCount *int32 `json:"maxCount,omitempty"` +} + +// GetConditions returns the machine pool conditions. +func (r *GCPManagedMachinePool) GetConditions() clusterv1.Conditions { + return r.Status.Conditions +} + +// SetConditions sets the status conditions for the GCPManagedMachinePool. +func (r *GCPManagedMachinePool) SetConditions(conditions clusterv1.Conditions) { + r.Status.Conditions = conditions +} + func init() { SchemeBuilder.Register(&GCPManagedMachinePool{}, &GCPManagedMachinePoolList{}) } diff --git a/exp/api/v1beta1/types.go b/exp/api/v1beta1/types.go index c3ad10987..f4c87bb30 100644 --- a/exp/api/v1beta1/types.go +++ b/exp/api/v1beta1/types.go @@ -16,6 +16,8 @@ limitations under the License. package v1beta1 +import "cloud.google.com/go/container/apiv1/containerpb" + // TaintEffect is the effect for a Kubernetes taint. type TaintEffect string @@ -32,3 +34,32 @@ type Taint struct { // Taints is an array of Taints. type Taints []Taint + +func convertToSdkTaintEffect(effect TaintEffect) containerpb.NodeTaint_Effect { + switch effect { + case "NoSchedule": + return containerpb.NodeTaint_NO_SCHEDULE + case "NoExecute": + return containerpb.NodeTaint_NO_EXECUTE + case "PreferNoSchedule": + return containerpb.NodeTaint_PREFER_NO_SCHEDULE + default: + return containerpb.NodeTaint_EFFECT_UNSPECIFIED + } +} + +// ConvertToSdkTaint converts taints to format that is used by GCP SDK. +func ConvertToSdkTaint(taints Taints) []*containerpb.NodeTaint { + if taints == nil { + return nil + } + res := []*containerpb.NodeTaint{} + for _, taint := range taints { + res = append(res, &containerpb.NodeTaint{ + Key: taint.Key, + Value: taint.Value, + Effect: convertToSdkTaintEffect(taint.Effect), + }) + } + return res +} diff --git a/exp/api/v1beta1/webhook_suite_test.go b/exp/api/v1beta1/webhook_suite_test.go index 1b58cca1e..1dfa274b7 100644 --- a/exp/api/v1beta1/webhook_suite_test.go +++ b/exp/api/v1beta1/webhook_suite_test.go @@ -28,7 +28,6 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" - //+kubebuilder:scaffold:imports admissionv1beta1 "k8s.io/api/admission/v1beta1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/rest" diff --git a/exp/api/v1beta1/zz_generated.deepcopy.go b/exp/api/v1beta1/zz_generated.deepcopy.go index cef156b10..1e0e020be 100644 --- a/exp/api/v1beta1/zz_generated.deepcopy.go +++ b/exp/api/v1beta1/zz_generated.deepcopy.go @@ -314,10 +314,10 @@ func (in *GCPManagedMachinePoolList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GCPManagedMachinePoolSpec) DeepCopyInto(out *GCPManagedMachinePoolSpec) { *out = *in - if in.NodeVersion != nil { - in, out := &in.NodeVersion, &out.NodeVersion - *out = new(string) - **out = **in + if in.Scaling != nil { + in, out := &in.Scaling, &out.Scaling + *out = new(NodePoolAutoScaling) + (*in).DeepCopyInto(*out) } if in.KubernetesLabels != nil { in, out := &in.KubernetesLabels, &out.KubernetesLabels @@ -377,6 +377,31 @@ func (in *GCPManagedMachinePoolStatus) DeepCopy() *GCPManagedMachinePoolStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodePoolAutoScaling) DeepCopyInto(out *NodePoolAutoScaling) { + *out = *in + if in.MinCount != nil { + in, out := &in.MinCount, &out.MinCount + *out = new(int32) + **out = **in + } + if in.MaxCount != nil { + in, out := &in.MaxCount, &out.MaxCount + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodePoolAutoScaling. +func (in *NodePoolAutoScaling) DeepCopy() *NodePoolAutoScaling { + if in == nil { + return nil + } + out := new(NodePoolAutoScaling) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Taint) DeepCopyInto(out *Taint) { *out = *in diff --git a/exp/controllers/gcpmanagedcontrolplane_controller.go b/exp/controllers/gcpmanagedcontrolplane_controller.go index ee7f3389d..b15a8ac2c 100644 --- a/exp/controllers/gcpmanagedcontrolplane_controller.go +++ b/exp/controllers/gcpmanagedcontrolplane_controller.go @@ -57,6 +57,8 @@ type GCPManagedControlPlaneReconciler struct { //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedcontrolplanes,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedcontrolplanes/status,verbs=get;update;patch //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedcontrolplanes/finalizers,verbs=update +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedclusters,verbs=get;list;watch +//+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch // SetupWithManager sets up the controller with the Manager. func (r *GCPManagedControlPlaneReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { diff --git a/exp/controllers/gcpmanagedmachinepool_controller.go b/exp/controllers/gcpmanagedmachinepool_controller.go index e00378804..071ed17f3 100644 --- a/exp/controllers/gcpmanagedmachinepool_controller.go +++ b/exp/controllers/gcpmanagedmachinepool_controller.go @@ -18,9 +18,37 @@ package controllers import ( "context" + "fmt" + "time" + + "github.com/go-logr/logr" + "github.com/googleapis/gax-go/v2/apierror" + "github.com/pkg/errors" + "google.golang.org/grpc/codes" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/cluster-api-provider-gcp/cloud" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/services/container/nodepools" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/record" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "sigs.k8s.io/cluster-api-provider-gcp/cloud/scope" + infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-gcp/util/reconciler" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/predicates" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/source" "k8s.io/apimachinery/pkg/runtime" - infrastructurev1beta1 "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -29,33 +57,325 @@ import ( // GCPManagedMachinePoolReconciler reconciles a GCPManagedMachinePool object. type GCPManagedMachinePoolReconciler struct { client.Client - Scheme *runtime.Scheme + ReconcileTimeout time.Duration + Scheme *runtime.Scheme + WatchFilterValue string +} + +// GetOwnerClusterKey returns only the Cluster name and namespace. +func GetOwnerClusterKey(obj metav1.ObjectMeta) (*client.ObjectKey, error) { + for _, ref := range obj.OwnerReferences { + if ref.Kind != "Cluster" { + continue + } + gv, err := schema.ParseGroupVersion(ref.APIVersion) + if err != nil { + return nil, errors.WithStack(err) + } + if gv.Group == clusterv1.GroupVersion.Group { + return &client.ObjectKey{ + Namespace: obj.Namespace, + Name: ref.Name, + }, nil + } + } + return nil, nil +} + +func machinePoolToInfrastructureMapFunc(gvk schema.GroupVersionKind) handler.MapFunc { + return func(o client.Object) []reconcile.Request { + m, ok := o.(*expclusterv1.MachinePool) + if !ok { + panic(fmt.Sprintf("Expected a MachinePool but got a %T", o)) + } + + gk := gvk.GroupKind() + // Return early if the GroupKind doesn't match what we expect + infraGK := m.Spec.Template.Spec.InfrastructureRef.GroupVersionKind().GroupKind() + if gk != infraGK { + return nil + } + + return []reconcile.Request{ + { + NamespacedName: client.ObjectKey{ + Namespace: m.Namespace, + Name: m.Spec.Template.Spec.InfrastructureRef.Name, + }, + }, + } + } +} + +func managedControlPlaneToManagedMachinePoolMapFunc(c client.Client, gvk schema.GroupVersionKind, log logr.Logger) handler.MapFunc { + return func(o client.Object) []reconcile.Request { + ctx := context.Background() + gcpManagedControlPlane, ok := o.(*infrav1exp.GCPManagedControlPlane) + if !ok { + panic(fmt.Sprintf("Expected a GCPManagedControlPlane but got a %T", o)) + } + + if !gcpManagedControlPlane.ObjectMeta.DeletionTimestamp.IsZero() { + return nil + } + + clusterKey, err := GetOwnerClusterKey(gcpManagedControlPlane.ObjectMeta) + if err != nil { + log.Error(err, "couldn't get GCPManagedControlPlane owner ObjectKey") + return nil + } + if clusterKey == nil { + return nil + } + + managedPoolForClusterList := expclusterv1.MachinePoolList{} + if err := c.List( + ctx, &managedPoolForClusterList, client.InNamespace(clusterKey.Namespace), client.MatchingLabels{clusterv1.ClusterLabelName: clusterKey.Name}, + ); err != nil { + log.Error(err, "couldn't list pools for cluster") + return nil + } + + mapFunc := machinePoolToInfrastructureMapFunc(gvk) + + var results []ctrl.Request + for i := range managedPoolForClusterList.Items { + managedPool := mapFunc(&managedPoolForClusterList.Items[i]) + results = append(results, managedPool...) + } + + return results + } +} + +// SetupWithManager sets up the controller with the Manager. +func (r *GCPManagedMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { + log := log.FromContext(ctx).WithValues("controller", "GCPManagedMachinePool") + + gvk, err := apiutil.GVKForObject(new(infrav1exp.GCPManagedMachinePool), mgr.GetScheme()) + if err != nil { + return errors.Wrapf(err, "failed to find GVK for GCPManagedMachinePool") + } + + c, err := ctrl.NewControllerManagedBy(mgr). + WithOptions(options). + For(&infrav1exp.GCPManagedMachinePool{}). + WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(log, r.WatchFilterValue)). + Watches( + &source.Kind{Type: &expclusterv1.MachinePool{}}, + handler.EnqueueRequestsFromMapFunc(machinePoolToInfrastructureMapFunc(gvk)), + ). + Watches( + &source.Kind{Type: &infrav1exp.GCPManagedControlPlane{}}, + handler.EnqueueRequestsFromMapFunc(managedControlPlaneToManagedMachinePoolMapFunc(r.Client, gvk, log)), + ). + Build(r) + if err != nil { + return errors.Wrap(err, "error creating controller") + } + + clusterToObjectFunc, err := util.ClusterToObjectsMapper(r.Client, &infrav1exp.GCPManagedMachinePoolList{}, mgr.GetScheme()) + if err != nil { + return errors.Wrap(err, "failed to create mapper for Cluster to GCPManagedMachinePools") + } + + // Add a watch on clusterv1.Cluster object for unpause & ready notifications. + if err := c.Watch( + &source.Kind{Type: &clusterv1.Cluster{}}, + handler.EnqueueRequestsFromMapFunc(clusterToObjectFunc), + predicates.ClusterUnpausedAndInfrastructureReady(log), + ); err != nil { + return errors.Wrap(err, "failed adding a watch for ready clusters") + } + + return nil +} + +// getMachinePoolByName finds and return a Machine object using the specified params. +func getMachinePoolByName(ctx context.Context, c client.Client, namespace, name string) (*expclusterv1.MachinePool, error) { + m := &expclusterv1.MachinePool{} + key := client.ObjectKey{Name: name, Namespace: namespace} + if err := c.Get(ctx, key, m); err != nil { + return nil, err + } + return m, nil +} + +// getOwnerMachinePool returns the MachinePool object owning the current resource. +func getOwnerMachinePool(ctx context.Context, c client.Client, obj metav1.ObjectMeta) (*expclusterv1.MachinePool, error) { + for _, ref := range obj.OwnerReferences { + if ref.Kind != "MachinePool" { + continue + } + gv, err := schema.ParseGroupVersion(ref.APIVersion) + if err != nil { + return nil, errors.WithStack(err) + } + if gv.Group == expclusterv1.GroupVersion.Group { + return getMachinePoolByName(ctx, c, obj.Namespace, ref.Name) + } + } + return nil, nil } //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedmachinepools,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedmachinepools/status,verbs=get;update;patch //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedmachinepools/finalizers,verbs=update +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedcontrolplanes,verbs=get;list;watch +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmanagedclusters,verbs=get;list;watch +//+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch +//+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch + +func (r *GCPManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { + ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultedLoopTimeout(r.ReconcileTimeout)) + defer cancel() + + log := ctrl.LoggerFrom(ctx) + + // Get the managed machine pool + gcpManagedMachinePool := &infrav1exp.GCPManagedMachinePool{} + if err := r.Client.Get(ctx, req.NamespacedName, gcpManagedMachinePool); err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{Requeue: true}, nil + } + + // Get the machine pool + machinePool, err := getOwnerMachinePool(ctx, r.Client, gcpManagedMachinePool.ObjectMeta) + if err != nil { + log.Error(err, "Failed to retrieve owner MachinePool from the API Server") + return ctrl.Result{}, err + } + if machinePool == nil { + log.Info("MachinePool Controller has not yet set OwnerRef") + return ctrl.Result{}, nil + } + + // Get the cluster + cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machinePool.ObjectMeta) + if err != nil { + log.Info("Failed to retrieve Cluster from MachinePool") + return ctrl.Result{}, err + } + if annotations.IsPaused(cluster, gcpManagedMachinePool) { + log.Info("Reconciliation is paused for this object") + return ctrl.Result{}, nil + } + + // Get the managed cluster + gcpManagedClusterKey := client.ObjectKey{ + Namespace: gcpManagedMachinePool.Namespace, + Name: cluster.Spec.InfrastructureRef.Name, + } + gcpManagedCluster := &infrav1exp.GCPManagedCluster{} + if err := r.Client.Get(ctx, gcpManagedClusterKey, gcpManagedCluster); err != nil || gcpManagedCluster == nil { + log.Error(err, "Failed to retrieve GCPManagedCluster from the API Server") + return ctrl.Result{}, err + } + + gcpManagedControlPlaneKey := client.ObjectKey{ + Namespace: gcpManagedMachinePool.Namespace, + Name: cluster.Spec.ControlPlaneRef.Name, + } + gcpManagedControlPlane := &infrav1exp.GCPManagedControlPlane{} + if err := r.Client.Get(ctx, gcpManagedControlPlaneKey, gcpManagedControlPlane); err != nil { + log.Info("Failed to retrieve ManagedControlPlane from ManagedMachinePool") + return reconcile.Result{}, nil + } + + if !gcpManagedControlPlane.Status.Ready { + log.Info("Control plane is not ready yet") + conditions.MarkFalse(gcpManagedMachinePool, infrav1exp.GKEMachinePoolReadyCondition, infrav1exp.WaitingForGKEControlPlaneReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{}, nil + } -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the GCPManagedMachinePool object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.2/pkg/reconcile -func (r *GCPManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) + managedMachinePoolScope, err := scope.NewManagedMachinePoolScope(ctx, scope.ManagedMachinePoolScopeParams{ + Client: r.Client, + Cluster: cluster, + MachinePool: machinePool, + GCPManagedCluster: gcpManagedCluster, + GCPManagedControlPlane: gcpManagedControlPlane, + GCPManagedMachinePool: gcpManagedMachinePool, + }) + if err != nil { + return ctrl.Result{}, errors.Errorf("failed to create scope: %+v", err) + } - // TODO(user): your logic here + // Always close the scope when exiting this function so we can persist any GCPMachine changes. + defer func() { + if err := managedMachinePoolScope.Close(); err != nil && reterr == nil { + reterr = err + } + }() + + // Handle deleted machine pool + if !gcpManagedMachinePool.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, managedMachinePoolScope) + } + + // Handle non-deleted machine pool + return r.reconcile(ctx, managedMachinePoolScope) +} + +func (r *GCPManagedMachinePoolReconciler) reconcile(ctx context.Context, managedMachinePoolScope *scope.ManagedMachinePoolScope) (ctrl.Result, error) { + log := log.FromContext(ctx) + log.Info("Reconciling GCPManagedMachinePool") + + controllerutil.AddFinalizer(managedMachinePoolScope.GCPManagedMachinePool, infrav1exp.ManagedMachinePoolFinalizer) + if err := managedMachinePoolScope.PatchObject(); err != nil { + return ctrl.Result{}, err + } + + reconcilers := []cloud.ReconcilerWithResult{ + nodepools.New(managedMachinePoolScope), + } + + for _, r := range reconcilers { + res, err := r.Reconcile(ctx) + if err != nil { + var e *apierror.APIError + if ok := errors.As(err, &e); ok { + if e.GRPCStatus().Code() == codes.FailedPrecondition { + log.Info("Cannot perform update when there's other operation, retry later") + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil + } + } + log.Error(err, "Reconcile error") + record.Warnf(managedMachinePoolScope.GCPManagedMachinePool, "GCPManagedMachinePoolReconcile", "Reconcile error - %v", err) + return ctrl.Result{}, err + } + if res.Requeue { + return res, nil + } + } return ctrl.Result{}, nil } -// SetupWithManager sets up the controller with the Manager. -func (r *GCPManagedMachinePoolReconciler) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&infrastructurev1beta1.GCPManagedMachinePool{}). - Complete(r) +func (r *GCPManagedMachinePoolReconciler) reconcileDelete(ctx context.Context, managedMachinePoolScope *scope.ManagedMachinePoolScope) (ctrl.Result, error) { + log := log.FromContext(ctx) + log.Info("Deleting GCPManagedMachinePool") + + reconcilers := []cloud.ReconcilerWithResult{ + nodepools.New(managedMachinePoolScope), + } + + for _, r := range reconcilers { + res, err := r.Delete(ctx) + if err != nil { + log.Error(err, "Reconcile error") + record.Warnf(managedMachinePoolScope.GCPManagedMachinePool, "GCPManagedMachinePoolReconcile", "Reconcile error - %v", err) + return ctrl.Result{}, err + } + if res.Requeue { + return res, nil + } + } + + if conditions.Get(managedMachinePoolScope.GCPManagedMachinePool, infrav1exp.GKEMachinePoolDeletingCondition).Reason == infrav1exp.GKEMachinePoolDeletedReason { + controllerutil.RemoveFinalizer(managedMachinePoolScope.GCPManagedMachinePool, infrav1exp.ManagedMachinePoolFinalizer) + } + + return ctrl.Result{RequeueAfter: reconciler.DefaultRetryTime}, nil } diff --git a/go.mod b/go.mod index 1082da4a0..27441ae0c 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module sigs.k8s.io/cluster-api-provider-gcp go 1.19 require ( + cloud.google.com/go/compute v1.15.1 cloud.google.com/go/container v1.13.0 cloud.google.com/go/iam v0.10.0 github.com/GoogleCloudPlatform/k8s-cloud-provider v1.20.0 @@ -32,7 +33,6 @@ require ( ) require ( - cloud.google.com/go/compute v1.15.1 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect github.com/BurntSushi/toml v1.0.0 // indirect github.com/MakeNowJust/heredoc v1.0.0 // indirect diff --git a/main.go b/main.go index 1291d7ec2..cbfc5a21c 100644 --- a/main.go +++ b/main.go @@ -18,6 +18,7 @@ limitations under the License. package main import ( + "context" "flag" "fmt" "net/http" @@ -110,6 +111,8 @@ func main() { ctrl.SetLogger(klogr.New()) + setupLog.Info(fmt.Sprintf("feature gates: %+v\n", feature.Gates)) + // Machine and cluster operations can create enough events to trigger the event recorder spam filter // Setting the burst size higher ensures all events will be recorded and submitted to the API broadcaster := cgrecord.NewBroadcasterWithCorrelatorOptions(cgrecord.CorrelatorOptions{ @@ -143,56 +146,75 @@ func main() { // Setup the context that's going to be used in controllers and for the manager. ctx := ctrl.SetupSignalHandler() - if err = (&controllers.GCPMachineReconciler{ + if setupErr := setupReconcilers(ctx, mgr); setupErr != nil { + setupLog.Error(err, "unable to setup reconcilers") + os.Exit(1) + } + + if setupErr := setupWebhooks(mgr); setupErr != nil { + setupLog.Error(err, "unable to setup webhooks") + os.Exit(1) + } + + if setupErr := setupProbes(mgr); setupErr != nil { + setupLog.Error(err, "unable to setup probes") + os.Exit(1) + } + + // +kubebuilder:scaffold:builder + setupLog.Info("starting manager", "version", version.Get().String(), "extended_info", version.Get()) + if err := mgr.Start(ctx); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} + +func setupReconcilers(ctx context.Context, mgr ctrl.Manager) error { + if err := (&controllers.GCPMachineReconciler{ Client: mgr.GetClient(), ReconcileTimeout: reconcileTimeout, WatchFilterValue: watchFilterValue, }).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: gcpMachineConcurrency}); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "GCPMachine") - os.Exit(1) + return fmt.Errorf("setting up GCPMachine controller: %w", err) } - if err = (&controllers.GCPClusterReconciler{ + if err := (&controllers.GCPClusterReconciler{ Client: mgr.GetClient(), ReconcileTimeout: reconcileTimeout, WatchFilterValue: watchFilterValue, }).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: gcpClusterConcurrency}); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "GCPCluster") - os.Exit(1) + return fmt.Errorf("setting up GCPCluster controller: %w", err) } - if err = (&infrav1beta1.GCPCluster{}).SetupWebhookWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create webhook", "webhook", "GCPCluster") - os.Exit(1) + return nil +} + +func setupWebhooks(mgr ctrl.Manager) error { + if err := (&infrav1beta1.GCPCluster{}).SetupWebhookWithManager(mgr); err != nil { + return fmt.Errorf("setting up GCPCluster webhook: %w", err) } - if err = (&infrav1beta1.GCPClusterTemplate{}).SetupWebhookWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create webhook", "webhook", "GCPClusterTemplate") - os.Exit(1) + if err := (&infrav1beta1.GCPClusterTemplate{}).SetupWebhookWithManager(mgr); err != nil { + return fmt.Errorf("setting up GCPClusterTemplate webhook: %w", err) } - if err = (&infrav1beta1.GCPMachine{}).SetupWebhookWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create webhook", "webhook", "GCPMachine") - os.Exit(1) + if err := (&infrav1beta1.GCPMachine{}).SetupWebhookWithManager(mgr); err != nil { + return fmt.Errorf("setting up GCPMachine webhook: %w", err) } - if err = (&infrav1beta1.GCPMachineTemplate{}).SetupWebhookWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create webhook", "webhook", "GCPMachineTemplate") - os.Exit(1) + if err := (&infrav1beta1.GCPMachineTemplate{}).SetupWebhookWithManager(mgr); err != nil { + return fmt.Errorf("setting up GCPMachineTemplate webhook: %w", err) } + return nil +} + +func setupProbes(mgr ctrl.Manager) error { if err := mgr.AddReadyzCheck("webhook", mgr.GetWebhookServer().StartedChecker()); err != nil { - setupLog.Error(err, "unable to create ready check") - os.Exit(1) + return fmt.Errorf("creating ready check: %w", err) } if err := mgr.AddHealthzCheck("webhook", mgr.GetWebhookServer().StartedChecker()); err != nil { - setupLog.Error(err, "unable to create health check") - os.Exit(1) + return fmt.Errorf("creating health check: %w", err) } - // +kubebuilder:scaffold:builder - setupLog.Info("starting manager", "version", version.Get().String(), "extended_info", version.Get()) - if err := mgr.Start(ctx); err != nil { - setupLog.Error(err, "problem running manager") - os.Exit(1) - } + return nil } func initFlags(fs *pflag.FlagSet) { diff --git a/util/location/location.go b/util/location/location.go new file mode 100644 index 000000000..dd50b3034 --- /dev/null +++ b/util/location/location.go @@ -0,0 +1,50 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package location implements location parsing utilities. +package location + +import ( + "strings" + + "github.com/pkg/errors" +) + +// Location captures the region and zone of a GCP location. +// Examples of GCP location: +// us-central1 (region). +// us-central1-c (region with zone). +type Location struct { + Region string + Zone *string +} + +// Parse parses a location string. +func Parse(location string) (Location, error) { + parts := strings.Split(location, "-") + if len(parts) < 2 { + return Location{}, errors.New("invalid location") + } + region := strings.Join(parts[:2], "-") + var zone *string + if len(parts) == 3 { + zone = &parts[2] + } + return Location{ + Region: region, + Zone: zone, + }, nil +} diff --git a/util/resourceurl/parse.go b/util/resourceurl/parse.go new file mode 100644 index 000000000..b65d23fc8 --- /dev/null +++ b/util/resourceurl/parse.go @@ -0,0 +1,75 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package resourceurl implements resource url parsing utilities. +package resourceurl + +import ( + "strings" + + "github.com/pkg/errors" +) + +const ( + // ResourcePrefix is the prefix of a resource URL. + ResourcePrefix = "https://www.googleapis.com/" + // NumParts is the number of parts in a resource URL. + NumParts = 8 + // ResourceCategoryIndex is the index of the resource category in resource URL parts. + ResourceCategoryIndex = 0 + // ProjectIndex is the index of the project in resource URL parts. + ProjectIndex = 3 + // LocationIndex is the index of the location in resource URL parts. + LocationIndex = 5 + // SubResourceIndex is the index of the sub resource in resource URL parts. + SubResourceIndex = 6 + // NameIndex is the index of the name in resource URL parts. + NameIndex = 7 +) + +// ResourceURL captures the individual fields of a GCP resource URL. +// An example of GCP resource URL: +// https://www.googleapis.com/compute/v1/projects/my-project/zones/us-central1-b/instanceGroupManagers/gke-capg-gke-demo-mypool-aa1282e0-grp +type ResourceURL struct { + // The resource category (e.g. compute) + ResourceCategory string + // The project where the resource lives in (e.g. my-project) + Project string + // The location where the resource lives in (e.g. us-central1-b) + Location string + // The sub-type of the resource (e.g. instanceGroupManagers) + SubResource string + // The name of the resource (e.g. gke-capg-gke-demo-mypool-aa1282e0-grp) + Name string +} + +// Parse parses a resource url. +func Parse(url string) (ResourceURL, error) { + if !strings.HasPrefix(url, ResourcePrefix) { + return ResourceURL{}, errors.New("invalid resource url") + } + parts := strings.Split(url[len(ResourcePrefix):], "/") + if len(parts) != NumParts { + return ResourceURL{}, errors.New("invalid resource url") + } + return ResourceURL{ + ResourceCategory: parts[ResourceCategoryIndex], + Project: parts[ProjectIndex], + Location: parts[LocationIndex], + SubResource: parts[SubResourceIndex], + Name: parts[NameIndex], + }, nil +}